diff --git "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb" "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
--- "a/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
+++ "b/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb"
@@ -3,13 +3,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "46576376",
+   "id": "f1d7ccda",
    "metadata": {
     "papermill": {
-     "duration": 0.00263,
-     "end_time": "2023-09-02T08:41:08.904462",
+     "duration": 0.002567,
+     "end_time": "2023-09-02T06:16:28.958047",
      "exception": false,
-     "start_time": "2023-09-02T08:41:08.901832",
+     "start_time": "2023-09-02T06:16:28.955480",
      "status": "completed"
     },
     "tags": []
@@ -28,13 +28,13 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "id": "85f150d0",
+   "id": "54efc4fa",
    "metadata": {
     "papermill": {
-     "duration": 0.001681,
-     "end_time": "2023-09-02T08:41:08.908102",
+     "duration": 0.001679,
+     "end_time": "2023-09-02T06:16:28.961787",
      "exception": false,
-     "start_time": "2023-09-02T08:41:08.906421",
+     "start_time": "2023-09-02T06:16:28.960108",
      "status": "completed"
     },
     "tags": []
@@ -46,19 +46,19 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "f904d7f9",
+   "id": "8b4caf1b",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:08.912507Z",
-     "iopub.status.busy": "2023-09-02T08:41:08.912330Z",
-     "iopub.status.idle": "2023-09-02T08:41:09.629380Z",
-     "shell.execute_reply": "2023-09-02T08:41:09.628542Z"
+     "iopub.execute_input": "2023-09-02T06:16:28.967200Z",
+     "iopub.status.busy": "2023-09-02T06:16:28.966328Z",
+     "iopub.status.idle": "2023-09-02T06:16:29.684585Z",
+     "shell.execute_reply": "2023-09-02T06:16:29.683732Z"
     },
     "papermill": {
-     "duration": 0.721494,
-     "end_time": "2023-09-02T08:41:09.631404",
+     "duration": 0.722814,
+     "end_time": "2023-09-02T06:16:29.686619",
      "exception": false,
-     "start_time": "2023-09-02T08:41:08.909910",
+     "start_time": "2023-09-02T06:16:28.963805",
      "status": "completed"
     },
     "tags": []
@@ -74,19 +74,19 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "222e5b4f",
+   "id": "6cc3a721",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:09.636778Z",
-     "iopub.status.busy": "2023-09-02T08:41:09.636579Z",
-     "iopub.status.idle": "2023-09-02T08:41:12.546148Z",
-     "shell.execute_reply": "2023-09-02T08:41:12.545350Z"
+     "iopub.execute_input": "2023-09-02T06:16:29.691742Z",
+     "iopub.status.busy": "2023-09-02T06:16:29.691539Z",
+     "iopub.status.idle": "2023-09-02T06:16:32.527158Z",
+     "shell.execute_reply": "2023-09-02T06:16:32.526387Z"
     },
     "papermill": {
-     "duration": 2.914378,
-     "end_time": "2023-09-02T08:41:12.548071",
+     "duration": 2.840419,
+     "end_time": "2023-09-02T06:16:32.529061",
      "exception": false,
-     "start_time": "2023-09-02T08:41:09.633693",
+     "start_time": "2023-09-02T06:16:29.688642",
      "status": "completed"
     },
     "tags": []
@@ -118,19 +118,19 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "52f966a7",
+   "id": "6c3b0fbe",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:12.553617Z",
-     "iopub.status.busy": "2023-09-02T08:41:12.553422Z",
-     "iopub.status.idle": "2023-09-02T08:41:12.560017Z",
-     "shell.execute_reply": "2023-09-02T08:41:12.559418Z"
+     "iopub.execute_input": "2023-09-02T06:16:32.534904Z",
+     "iopub.status.busy": "2023-09-02T06:16:32.534704Z",
+     "iopub.status.idle": "2023-09-02T06:16:32.541251Z",
+     "shell.execute_reply": "2023-09-02T06:16:32.540683Z"
     },
     "papermill": {
-     "duration": 0.010711,
-     "end_time": "2023-09-02T08:41:12.561162",
+     "duration": 0.010568,
+     "end_time": "2023-09-02T06:16:32.542402",
      "exception": false,
-     "start_time": "2023-09-02T08:41:12.550451",
+     "start_time": "2023-09-02T06:16:32.531834",
      "status": "completed"
     },
     "tags": []
@@ -193,19 +193,19 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "5969a736",
+   "id": "2bb9275c",
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:12.566119Z",
-     "iopub.status.busy": "2023-09-02T08:41:12.565958Z",
-     "iopub.status.idle": "2023-09-02T08:41:46.796618Z",
-     "shell.execute_reply": "2023-09-02T08:41:46.795641Z"
+     "iopub.execute_input": "2023-09-02T06:16:32.547321Z",
+     "iopub.status.busy": "2023-09-02T06:16:32.547162Z",
+     "iopub.status.idle": "2023-09-02T06:17:41.766730Z",
+     "shell.execute_reply": "2023-09-02T06:17:41.765728Z"
     },
     "papermill": {
-     "duration": 34.234865,
-     "end_time": "2023-09-02T08:41:46.798178",
+     "duration": 69.223979,
+     "end_time": "2023-09-02T06:17:41.768492",
      "exception": false,
-     "start_time": "2023-09-02T08:41:12.563313",
+     "start_time": "2023-09-02T06:16:32.544513",
      "status": "completed"
     },
     "tags": []
@@ -215,7 +215,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "--2023-09-02 08:41:12--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
+      "--2023-09-02 06:16:32--  https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n",
       "Resolving huggingface.co (huggingface.co)... "
      ]
     },
@@ -223,8 +223,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "18.165.122.11, 18.165.122.30, 18.165.122.101, ...\r\n",
-      "Connecting to huggingface.co (huggingface.co)|18.165.122.11|:443... connected.\r\n",
+      "18.165.122.101, 18.165.122.11, 18.165.122.120, ...\r\n",
+      "Connecting to huggingface.co (huggingface.co)|18.165.122.101|:443... connected.\r\n",
       "HTTP request sent, awaiting response... "
      ]
     },
@@ -233,8 +233,8 @@
      "output_type": "stream",
      "text": [
       "302 Found\r\n",
-      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693903272&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzkwMzI3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=XYUEm5rWRl8TZW2GseMQ7CZsoQWK%7ExizTrnu71lLngQQTBmQGTJGqqKHVXkB3N%7E-l3HrQnY-2UE1xVGJAqeSSkvBLsQO84qIhPYkt2sOJKLMFYyidxXiI%7EPdnW9XJw38lLiZWkBFo4HJEaU-L4Wa%7Ej2zz19V19fUHN01xqokVHF7EJgvgltH7R9MrBIW16GVNkX5GPy16UdOOykn12Eag5OerEkjigfYVxOtHy5cEOSjXjRZHLm8bFzUFe9K7jGNCdddmNebAQwCl%7E1dLXeCQya6rDrNtjse52R5tzuPCKM8dcyaR5vv2ZznDl3Njk1Lo7KbFZ3O3XYmE9XeAYId%7Eg__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
-      "--2023-09-02 08:41:12--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693903272&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5MzkwMzI3Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=XYUEm5rWRl8TZW2GseMQ7CZsoQWK%7ExizTrnu71lLngQQTBmQGTJGqqKHVXkB3N%7E-l3HrQnY-2UE1xVGJAqeSSkvBLsQO84qIhPYkt2sOJKLMFYyidxXiI%7EPdnW9XJw38lLiZWkBFo4HJEaU-L4Wa%7Ej2zz19V19fUHN01xqokVHF7EJgvgltH7R9MrBIW16GVNkX5GPy16UdOOykn12Eag5OerEkjigfYVxOtHy5cEOSjXjRZHLm8bFzUFe9K7jGNCdddmNebAQwCl%7E1dLXeCQya6rDrNtjse52R5tzuPCKM8dcyaR5vv2ZznDl3Njk1Lo7KbFZ3O3XYmE9XeAYId%7Eg__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
+      "Location: https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693894592&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5Mzg5NDU5Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=0lvghwVju7ugxwlMlvj%7EIeK4lhVAMO1bcdYQsP0FxGPt5AK88smuTfGJvprL1l1HiQVj4a7f6U8yvc97nQxIdWdTF0RC8s4SAyFcZoupxdMbzOQ7kEr7OgXQPIdUhXWFAQN8Jrd-gkTfbl1ZAR2HjikcsW0MZI2do5UoTezTx6aBHfDhSwsx0SOCKOXHdWmyvofJts71OggEykKwEqY0AH3CHBro5v88luav9avT6Ha3nrtoJQKwKBxYLkC5RVwbsw7L6Br%7EV14-DYVOr1EIrDRifwMfNjtbDrx5eoJiXJF4mpngng5zcaDIofe1LUprJ8oOglZbD8IwmUJyW6JiRw__&Key-Pair-Id=KVTP0A1DKRTAX [following]\r\n",
+      "--2023-09-02 06:16:32--  https://cdn-lfs.huggingface.co/repos/2e/f7/2ef78555202aa92abdbdf476ce3d0fd5a8b15f7245edf0b80d4d30572355f30d/20fb328bd798a1e23967f80661a0b6a277f1d45ed2d90760cec68d32dfcbd516?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27v5-L96-D1024-E0_1-mem-ctx-4k.pth%3B+filename%3D%22v5-L96-D1024-E0_1-mem-ctx-4k.pth%22%3B&Expires=1693894592&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTY5Mzg5NDU5Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5odWdnaW5nZmFjZS5jby9yZXBvcy8yZS9mNy8yZWY3ODU1NTIwMmFhOTJhYmRiZGY0NzZjZTNkMGZkNWE4YjE1ZjcyNDVlZGYwYjgwZDRkMzA1NzIzNTVmMzBkLzIwZmIzMjhiZDc5OGExZTIzOTY3ZjgwNjYxYTBiNmEyNzdmMWQ0NWVkMmQ5MDc2MGNlYzY4ZDMyZGZjYmQ1MTY%7EcmVzcG9uc2UtY29udGVudC1kaXNwb3NpdGlvbj0qIn1dfQ__&Signature=0lvghwVju7ugxwlMlvj%7EIeK4lhVAMO1bcdYQsP0FxGPt5AK88smuTfGJvprL1l1HiQVj4a7f6U8yvc97nQxIdWdTF0RC8s4SAyFcZoupxdMbzOQ7kEr7OgXQPIdUhXWFAQN8Jrd-gkTfbl1ZAR2HjikcsW0MZI2do5UoTezTx6aBHfDhSwsx0SOCKOXHdWmyvofJts71OggEykKwEqY0AH3CHBro5v88luav9avT6Ha3nrtoJQKwKBxYLkC5RVwbsw7L6Br%7EV14-DYVOr1EIrDRifwMfNjtbDrx5eoJiXJF4mpngng5zcaDIofe1LUprJ8oOglZbD8IwmUJyW6JiRw__&Key-Pair-Id=KVTP0A1DKRTAX\r\n",
       "Resolving cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)... "
      ]
     },
@@ -242,15 +242,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "108.156.22.119, 108.156.22.7, 108.156.22.58, ...\r\n",
-      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.156.22.119|:443... connected.\r\n"
+      "108.156.22.119, 108.156.22.58, 108.156.22.7, ...\r\n",
+      "Connecting to cdn-lfs.huggingface.co (cdn-lfs.huggingface.co)|108.156.22.119|:443... connected.\r\n",
+      "HTTP request sent, awaiting response... "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "HTTP request sent, awaiting response... 200 OK\r\n",
+      "200 OK\r\n",
       "Length: 2825976699 (2.6G) [binary/octet-stream]\r\n",
       "Saving to: ‘v5-L96-D1024-E0_1-mem-ctx-4k.pth’\r\n",
       "\r\n",
@@ -263,7 +264,343 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5-L96-D10   0%[                    ]  13.31M  66.4MB/s               "
+      "         v5-L96-D10   0%[                    ] 128.27K   472KB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "        v5-L96-D102   0%[                    ]   1.18M  2.49MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "       v5-L96-D1024   0%[                    ]   3.65M  5.41MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "      v5-L96-D1024-   0%[                    ]  10.10M  11.5MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "     v5-L96-D1024-E   0%[                    ]  20.48M  18.5MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "    v5-L96-D1024-E0   1%[                    ]  28.53M  21.6MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "   v5-L96-D1024-E0_   1%[                    ]  38.20M  25.0MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "  v5-L96-D1024-E0_1   1%[                    ]  45.14M  26.1MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      " v5-L96-D1024-E0_1-   1%[                    ]  53.31M  27.3MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "v5-L96-D1024-E0_1-m   2%[                    ]  61.80M  28.7MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "5-L96-D1024-E0_1-me   2%[                    ]  68.40M  29.1MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-L96-D1024-E0_1-mem   2%[                    ]  78.76M  30.8MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "L96-D1024-E0_1-mem-   3%[                    ]  85.85M  31.1MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "96-D1024-E0_1-mem-c   3%[                    ]  95.06M  31.9MB/s               "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "6-D1024-E0_1-mem-ct   3%[                    ] 103.00M  32.3MB/s    eta 80s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-D1024-E0_1-mem-ctx   4%[                    ] 110.60M  32.4MB/s    eta 80s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "D1024-E0_1-mem-ctx-   4%[                    ] 121.50M  36.9MB/s    eta 80s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1024-E0_1-mem-ctx-4   4%[                    ] 130.07M  39.0MB/s    eta 80s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "024-E0_1-mem-ctx-4k   5%[>                   ] 137.57M  39.9MB/s    eta 80s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "24-E0_1-mem-ctx-4k.   5%[>                   ] 147.07M  40.5MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4-E0_1-mem-ctx-4k.p   5%[>                   ] 156.38M  40.1MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-E0_1-mem-ctx-4k.pt   6%[>                   ] 164.59M  40.0MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "E0_1-mem-ctx-4k.pth   6%[>                   ] 171.63M  39.2MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "0_1-mem-ctx-4k.pth    6%[>                   ] 180.27M  40.0MB/s    eta 74s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "_1-mem-ctx-4k.pth     6%[>                   ] 187.18M  39.2MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "1-mem-ctx-4k.pth      7%[>                   ] 193.83M  39.0MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-mem-ctx-4k.pth       7%[>                   ] 202.32M  39.3MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "mem-ctx-4k.pth        7%[>                   ] 210.41M  39.0MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "em-ctx-4k.pth         8%[>                   ] 219.40M  39.4MB/s    eta 71s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "m-ctx-4k.pth          8%[>                   ] 226.92M  39.2MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-ctx-4k.pth           8%[>                   ] 235.87M  39.2MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "ctx-4k.pth            9%[>                   ] 244.37M  39.2MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "tx-4k.pth             9%[>                   ] 252.94M  39.4MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "x-4k.pth              9%[>                   ] 260.46M  39.1MB/s    eta 69s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "-4k.pth               9%[>                   ] 268.38M  39.0MB/s    eta 67s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "4k.pth               10%[=>                  ] 278.50M  39.2MB/s    eta 67s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "k.pth                10%[=>                  ] 286.82M  39.2MB/s    eta 67s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      ".pth                 10%[=>                  ] 294.98M  39.1MB/s    eta 67s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "pth                  11%[=>                  ] 303.99M  40.1MB/s    eta 67s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "th                   11%[=>                  ] 312.18M  39.9MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "h                    11%[=>                  ] 320.37M  40.7MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                     12%[=>                  ] 327.41M  40.2MB/s    eta 64s    "
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\r",
+      "                  v  12%[=>                  ] 334.85M  40.1MB/s    eta 64s    "
      ]
     },
     {
@@ -271,7 +608,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5-L96-D102   1%[                    ]  28.83M  72.0MB/s               "
+      "                 v5  12%[=>                  ] 343.29M  40.2MB/s    eta 64s    "
      ]
     },
     {
@@ -279,7 +616,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5-L96-D1024   1%[                    ]  46.59M  77.6MB/s               "
+      "                v5-  12%[=>                  ] 350.02M  39.8MB/s    eta 63s    "
      ]
     },
     {
@@ -287,7 +624,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5-L96-D1024-   2%[                    ]  64.08M  80.0MB/s               "
+      "               v5-L  13%[=>                  ] 359.43M  40.0MB/s    eta 63s    "
      ]
     },
     {
@@ -295,7 +632,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5-L96-D1024-E   3%[                    ]  81.49M  81.4MB/s               "
+      "              v5-L9  13%[=>                  ] 366.85M  39.7MB/s    eta 63s    "
      ]
     },
     {
@@ -303,7 +640,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5-L96-D1024-E0   3%[                    ]  96.12M  79.1MB/s               "
+      "             v5-L96  13%[=>                  ] 374.94M  39.6MB/s    eta 63s    "
      ]
     },
     {
@@ -311,7 +648,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5-L96-D1024-E0_   4%[                    ] 111.73M  78.9MB/s               "
+      "            v5-L96-  14%[=>                  ] 383.94M  39.8MB/s    eta 63s    "
      ]
     },
     {
@@ -319,7 +656,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5-L96-D1024-E0_1   4%[                    ] 128.42M  79.5MB/s               "
+      "           v5-L96-D  14%[=>                  ] 389.22M  39.0MB/s    eta 62s    "
      ]
     },
     {
@@ -327,7 +664,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5-L96-D1024-E0_1-   5%[>                   ] 147.05M  81.0MB/s               "
+      "          v5-L96-D1  14%[=>                  ] 399.01M  39.5MB/s    eta 62s    "
      ]
     },
     {
@@ -335,7 +672,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5-L96-D1024-E0_1-m   6%[>                   ] 162.80M  80.7MB/s               "
+      "         v5-L96-D10  14%[=>                  ] 400.87M  37.2MB/s    eta 62s    "
      ]
     },
     {
@@ -343,7 +680,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5-L96-D1024-E0_1-me   6%[>                   ] 180.14M  81.3MB/s               "
+      "        v5-L96-D102  15%[==>                 ] 409.16M  37.3MB/s    eta 62s    "
      ]
     },
     {
@@ -351,7 +688,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-L96-D1024-E0_1-mem   7%[>                   ] 196.70M  81.4MB/s               "
+      "       v5-L96-D1024  15%[==>                 ] 422.11M  38.5MB/s    eta 62s    "
      ]
     },
     {
@@ -359,7 +696,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "L96-D1024-E0_1-mem-   7%[>                   ] 212.43M  81.2MB/s               "
+      "      v5-L96-D1024-  15%[==>                 ] 430.96M  38.8MB/s    eta 60s    "
      ]
     },
     {
@@ -367,7 +704,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "96-D1024-E0_1-mem-c   8%[>                   ] 230.28M  81.7MB/s               "
+      "     v5-L96-D1024-E  16%[==>                 ] 438.90M  38.4MB/s    eta 60s    "
      ]
     },
     {
@@ -375,7 +712,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "6-D1024-E0_1-mem-ct   9%[>                   ] 248.08M  82.2MB/s    eta 30s    "
+      "    v5-L96-D1024-E0  16%[==>                 ] 448.99M  39.2MB/s    eta 60s    "
      ]
     },
     {
@@ -383,7 +720,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-D1024-E0_1-mem-ctx   9%[>                   ] 263.31M  82.6MB/s    eta 30s    "
+      "   v5-L96-D1024-E0_  16%[==>                 ] 454.42M  38.5MB/s    eta 60s    "
      ]
     },
     {
@@ -391,7 +728,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "D1024-E0_1-mem-ctx-  10%[=>                  ] 280.14M  83.0MB/s    eta 30s    "
+      "  v5-L96-D1024-E0_1  17%[==>                 ] 463.45M  39.3MB/s    eta 60s    "
      ]
     },
     {
@@ -399,7 +736,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1024-E0_1-mem-ctx-4  11%[=>                  ] 297.06M  83.3MB/s    eta 30s    "
+      " v5-L96-D1024-E0_1-  17%[==>                 ] 472.14M  39.5MB/s    eta 59s    "
      ]
     },
     {
@@ -407,7 +744,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "024-E0_1-mem-ctx-4k  11%[=>                  ] 313.79M  82.9MB/s    eta 30s    "
+      "v5-L96-D1024-E0_1-m  17%[==>                 ] 480.86M  39.7MB/s    eta 59s    "
      ]
     },
     {
@@ -415,7 +752,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "24-E0_1-mem-ctx-4k.  12%[=>                  ] 331.17M  82.9MB/s    eta 29s    "
+      "5-L96-D1024-E0_1-me  18%[==>                 ] 488.78M  39.6MB/s    eta 59s    "
      ]
     },
     {
@@ -423,7 +760,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4-E0_1-mem-ctx-4k.p  12%[=>                  ] 346.16M  82.7MB/s    eta 29s    "
+      "-L96-D1024-E0_1-mem  18%[==>                 ] 498.34M  40.4MB/s    eta 59s    "
      ]
     },
     {
@@ -431,7 +768,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-E0_1-mem-ctx-4k.pt  13%[=>                  ] 364.67M  83.4MB/s    eta 29s    "
+      "L96-D1024-E0_1-mem-  18%[==>                 ] 507.16M  40.0MB/s    eta 59s    "
      ]
     },
     {
@@ -439,7 +776,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "E0_1-mem-ctx-4k.pth  14%[=>                  ] 382.49M  83.9MB/s    eta 29s    "
+      "96-D1024-E0_1-mem-c  19%[==>                 ] 515.72M  40.0MB/s    eta 57s    "
      ]
     },
     {
@@ -447,7 +784,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "0_1-mem-ctx-4k.pth   14%[=>                  ] 399.47M  83.7MB/s    eta 29s    "
+      "6-D1024-E0_1-mem-ct  19%[==>                 ] 523.23M  40.3MB/s    eta 57s    "
      ]
     },
     {
@@ -455,7 +792,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "_1-mem-ctx-4k.pth    15%[==>                 ] 413.53M  82.3MB/s    eta 28s    "
+      "-D1024-E0_1-mem-ctx  19%[==>                 ] 530.88M  40.2MB/s    eta 57s    "
      ]
     },
     {
@@ -463,7 +800,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1-mem-ctx-4k.pth     15%[==>                 ] 430.24M  82.7MB/s    eta 28s    "
+      "D1024-E0_1-mem-ctx-  20%[===>                ] 541.15M  42.6MB/s    eta 57s    "
      ]
     },
     {
@@ -471,7 +808,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-mem-ctx-4k.pth      16%[==>                 ] 440.09M  79.6MB/s    eta 28s    "
+      "1024-E0_1-mem-ctx-4  20%[===>                ] 549.61M  40.9MB/s    eta 57s    "
      ]
     },
     {
@@ -479,7 +816,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "mem-ctx-4k.pth       16%[==>                 ] 451.39M  78.5MB/s    eta 28s    "
+      "024-E0_1-mem-ctx-4k  20%[===>                ] 557.83M  40.4MB/s    eta 56s    "
      ]
     },
     {
@@ -487,7 +824,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "em-ctx-4k.pth        17%[==>                 ] 468.42M  78.4MB/s    eta 28s    "
+      "24-E0_1-mem-ctx-4k.  21%[===>                ] 566.19M  40.5MB/s    eta 56s    "
      ]
     },
     {
@@ -495,7 +832,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "m-ctx-4k.pth         17%[==>                 ] 485.02M  76.8MB/s    eta 28s    "
+      "4-E0_1-mem-ctx-4k.p  21%[===>                ] 576.30M  40.9MB/s    eta 56s    "
      ]
     },
     {
@@ -503,7 +840,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-ctx-4k.pth          18%[==>                 ] 497.70M  75.9MB/s    eta 28s    "
+      "-E0_1-mem-ctx-4k.pt  21%[===>                ] 584.55M  40.6MB/s    eta 56s    "
      ]
     },
     {
@@ -511,7 +848,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "ctx-4k.pth           19%[==>                 ] 517.39M  76.9MB/s    eta 28s    "
+      "E0_1-mem-ctx-4k.pth  22%[===>                ] 593.19M  40.8MB/s    eta 56s    "
      ]
     },
     {
@@ -519,7 +856,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "tx-4k.pth            19%[==>                 ] 532.44M  76.4MB/s    eta 28s    "
+      "0_1-mem-ctx-4k.pth   22%[===>                ] 601.02M  41.2MB/s    eta 55s    "
      ]
     },
     {
@@ -527,7 +864,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "x-4k.pth             20%[===>                ] 546.53M  75.4MB/s    eta 28s    "
+      "_1-mem-ctx-4k.pth    22%[===>                ] 607.94M  40.3MB/s    eta 55s    "
      ]
     },
     {
@@ -535,7 +872,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-4k.pth              20%[===>                ] 563.48M  75.1MB/s    eta 27s    "
+      "1-mem-ctx-4k.pth     22%[===>                ] 617.53M  40.9MB/s    eta 55s    "
      ]
     },
     {
@@ -543,7 +880,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4k.pth               21%[===>                ] 582.10M  75.1MB/s    eta 27s    "
+      "-mem-ctx-4k.pth      23%[===>                ] 626.19M  41.1MB/s    eta 55s    "
      ]
     },
     {
@@ -551,7 +888,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "k.pth                22%[===>                ] 599.29M  75.2MB/s    eta 27s    "
+      "mem-ctx-4k.pth       23%[===>                ] 631.94M  40.2MB/s    eta 55s    "
      ]
     },
     {
@@ -559,7 +896,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      ".pth                 22%[===>                ] 617.27M  75.5MB/s    eta 27s    "
+      "em-ctx-4k.pth        23%[===>                ] 642.25M  40.8MB/s    eta 53s    "
      ]
     },
     {
@@ -567,7 +904,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "pth                  23%[===>                ] 632.54M  74.8MB/s    eta 27s    "
+      "m-ctx-4k.pth         24%[===>                ] 650.46M  40.2MB/s    eta 53s    "
      ]
     },
     {
@@ -575,7 +912,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "th                   24%[===>                ] 650.42M  75.1MB/s    eta 26s    "
+      "-ctx-4k.pth          24%[===>                ] 660.40M  41.1MB/s    eta 53s    "
      ]
     },
     {
@@ -583,7 +920,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "h                    24%[===>                ] 665.16M  74.5MB/s    eta 26s    "
+      "ctx-4k.pth           24%[===>                ] 669.04M  41.3MB/s    eta 53s    "
      ]
     },
     {
@@ -591,7 +928,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                     25%[====>               ] 680.40M  75.1MB/s    eta 26s    "
+      "tx-4k.pth            25%[====>               ] 677.22M  40.6MB/s    eta 53s    "
      ]
     },
     {
@@ -599,7 +936,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                  v  25%[====>               ] 696.57M  77.9MB/s    eta 26s    "
+      "x-4k.pth             25%[====>               ] 685.40M  40.8MB/s    eta 52s    "
      ]
     },
     {
@@ -607,7 +944,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                 v5  26%[====>               ] 715.45M  79.1MB/s    eta 26s    "
+      "-4k.pth              25%[====>               ] 692.90M  40.2MB/s    eta 52s    "
      ]
     },
     {
@@ -615,7 +952,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                v5-  27%[====>               ] 732.12M  79.5MB/s    eta 25s    "
+      "4k.pth               26%[====>               ] 701.72M  40.5MB/s    eta 52s    "
      ]
     },
     {
@@ -623,7 +960,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "               v5-L  27%[====>               ] 748.81M  81.2MB/s    eta 25s    "
+      "k.pth                26%[====>               ] 710.47M  40.8MB/s    eta 52s    "
      ]
     },
     {
@@ -631,7 +968,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "              v5-L9  28%[====>               ] 765.59M  80.9MB/s    eta 25s    "
+      ".pth                 26%[====>               ] 719.50M  40.4MB/s    eta 52s    "
      ]
     },
     {
@@ -639,7 +976,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "             v5-L96  29%[====>               ] 783.92M  81.8MB/s    eta 25s    "
+      "pth                  26%[====>               ] 727.67M  40.6MB/s    eta 51s    "
      ]
     },
     {
@@ -647,7 +984,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "            v5-L96-  29%[====>               ] 799.27M  81.9MB/s    eta 25s    "
+      "th                   27%[====>               ] 735.40M  40.5MB/s    eta 51s    "
      ]
     },
     {
@@ -655,7 +992,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "           v5-L96-D  30%[=====>              ] 815.27M  81.7MB/s    eta 24s    "
+      "h                    27%[====>               ] 745.58M  41.2MB/s    eta 51s    "
      ]
     },
     {
@@ -663,7 +1000,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "          v5-L96-D1  30%[=====>              ] 831.37M  81.9MB/s    eta 24s    "
+      "                     27%[====>               ] 754.27M  41.0MB/s    eta 51s    "
      ]
     },
     {
@@ -671,7 +1008,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5-L96-D10  31%[=====>              ] 848.57M  81.6MB/s    eta 24s    "
+      "                  v  28%[====>               ] 762.58M  40.9MB/s    eta 51s    "
      ]
     },
     {
@@ -679,7 +1016,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5-L96-D102  32%[=====>              ] 868.14M  82.7MB/s    eta 24s    "
+      "                 v5  28%[====>               ] 770.36M  40.8MB/s    eta 50s    "
      ]
     },
     {
@@ -687,7 +1024,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5-L96-D1024  32%[=====>              ] 884.10M  82.3MB/s    eta 24s    "
+      "                v5-  28%[====>               ] 778.11M  40.5MB/s    eta 50s    "
      ]
     },
     {
@@ -695,7 +1032,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5-L96-D1024-  33%[=====>              ] 896.71M  80.2MB/s    eta 23s    "
+      "               v5-L  29%[====>               ] 787.60M  40.9MB/s    eta 50s    "
      ]
     },
     {
@@ -703,7 +1040,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5-L96-D1024-E  33%[=====>              ] 913.93M  81.2MB/s    eta 23s    "
+      "              v5-L9  29%[====>               ] 795.51M  40.5MB/s    eta 50s    "
      ]
     },
     {
@@ -711,7 +1048,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5-L96-D1024-E0  34%[=====>              ] 926.25M  80.5MB/s    eta 23s    "
+      "             v5-L96  29%[====>               ] 804.15M  40.9MB/s    eta 50s    "
      ]
     },
     {
@@ -719,7 +1056,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5-L96-D1024-E0_  34%[=====>              ] 942.73M  80.4MB/s    eta 23s    "
+      "            v5-L96-  30%[=====>              ] 811.94M  40.2MB/s    eta 48s    "
      ]
     },
     {
@@ -727,7 +1064,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5-L96-D1024-E0_1  35%[======>             ] 959.37M  80.3MB/s    eta 23s    "
+      "           v5-L96-D  30%[=====>              ] 819.92M  40.3MB/s    eta 48s    "
      ]
     },
     {
@@ -735,7 +1072,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5-L96-D1024-E0_1-  36%[======>             ] 976.46M  80.6MB/s    eta 22s    "
+      "          v5-L96-D1  30%[=====>              ] 828.12M  40.4MB/s    eta 48s    "
      ]
     },
     {
@@ -743,7 +1080,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5-L96-D1024-E0_1-m  36%[======>             ] 991.58M  79.6MB/s    eta 22s    "
+      "         v5-L96-D10  31%[=====>              ] 837.98M  40.4MB/s    eta 48s    "
      ]
     },
     {
@@ -751,7 +1088,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5-L96-D1024-E0_1-me  37%[======>             ]   1007M  79.3MB/s    eta 22s    "
+      "        v5-L96-D102  31%[=====>              ] 846.44M  40.3MB/s    eta 48s    "
      ]
     },
     {
@@ -759,7 +1096,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-L96-D1024-E0_1-mem  38%[======>             ]   1.00G  79.8MB/s    eta 22s    "
+      "       v5-L96-D1024  31%[=====>              ] 855.32M  40.7MB/s    eta 47s    "
      ]
     },
     {
@@ -767,7 +1104,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "L96-D1024-E0_1-mem-  38%[======>             ]   1.02G  79.6MB/s    eta 22s    "
+      "      v5-L96-D1024-  32%[=====>              ] 863.40M  40.7MB/s    eta 47s    "
      ]
     },
     {
@@ -775,7 +1112,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "96-D1024-E0_1-mem-c  39%[======>             ]   1.03G  79.4MB/s    eta 21s    "
+      "     v5-L96-D1024-E  32%[=====>              ] 871.21M  40.2MB/s    eta 47s    "
      ]
     },
     {
@@ -783,7 +1120,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "6-D1024-E0_1-mem-ct  39%[======>             ]   1.03G  73.8MB/s    eta 21s    "
+      "    v5-L96-D1024-E0  32%[=====>              ] 880.97M  41.1MB/s    eta 47s    "
      ]
     },
     {
@@ -791,7 +1128,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-D1024-E0_1-mem-ctx  39%[======>             ]   1.05G  74.1MB/s    eta 21s    "
+      "   v5-L96-D1024-E0_  32%[=====>              ] 889.22M  40.5MB/s    eta 47s    "
      ]
     },
     {
@@ -799,7 +1136,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "D1024-E0_1-mem-ctx-  40%[=======>            ]   1.07G  73.3MB/s    eta 21s    "
+      "  v5-L96-D1024-E0_1  33%[=====>              ] 897.03M  40.2MB/s    eta 46s    "
      ]
     },
     {
@@ -807,7 +1144,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1024-E0_1-mem-ctx-4  41%[=======>            ]   1.08G  74.0MB/s    eta 21s    "
+      " v5-L96-D1024-E0_1-  33%[=====>              ] 898.93M  38.4MB/s    eta 46s    "
      ]
     },
     {
@@ -815,7 +1152,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "024-E0_1-mem-ctx-4k  41%[=======>            ]   1.10G  74.6MB/s    eta 20s    "
+      "v5-L96-D1024-E0_1-m  33%[=====>              ] 915.07M  40.6MB/s    eta 46s    "
      ]
     },
     {
@@ -823,7 +1160,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "24-E0_1-mem-ctx-4k.  42%[=======>            ]   1.11G  75.3MB/s    eta 20s    "
+      "5-L96-D1024-E0_1-me  34%[=====>              ] 924.14M  40.6MB/s    eta 46s    "
      ]
     },
     {
@@ -831,7 +1168,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4-E0_1-mem-ctx-4k.p  42%[=======>            ]   1.13G  75.1MB/s    eta 20s    "
+      "-L96-D1024-E0_1-mem  34%[=====>              ] 932.36M  40.8MB/s    eta 46s    "
      ]
     },
     {
@@ -839,7 +1176,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-E0_1-mem-ctx-4k.pt  43%[=======>            ]   1.15G  76.2MB/s    eta 20s    "
+      "L96-D1024-E0_1-mem-  34%[=====>              ] 941.37M  40.7MB/s    eta 45s    "
      ]
     },
     {
@@ -847,7 +1184,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "E0_1-mem-ctx-4k.pth  44%[=======>            ]   1.17G  77.2MB/s    eta 20s    "
+      "96-D1024-E0_1-mem-c  35%[======>             ] 949.66M  40.9MB/s    eta 45s    "
      ]
     },
     {
@@ -855,7 +1192,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "0_1-mem-ctx-4k.pth   44%[=======>            ]   1.18G  77.7MB/s    eta 19s    "
+      "6-D1024-E0_1-mem-ct  35%[======>             ] 958.50M  40.8MB/s    eta 45s    "
      ]
     },
     {
@@ -863,7 +1200,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "_1-mem-ctx-4k.pth    45%[========>           ]   1.20G  78.5MB/s    eta 19s    "
+      "-D1024-E0_1-mem-ctx  35%[======>             ] 966.79M  40.7MB/s    eta 45s    "
      ]
     },
     {
@@ -871,7 +1208,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1-mem-ctx-4k.pth     46%[========>           ]   1.22G  78.6MB/s    eta 19s    "
+      "D1024-E0_1-mem-ctx-  36%[======>             ] 976.33M  41.2MB/s    eta 45s    "
      ]
     },
     {
@@ -879,7 +1216,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-mem-ctx-4k.pth      46%[========>           ]   1.23G  77.9MB/s    eta 19s    "
+      "1024-E0_1-mem-ctx-4  36%[======>             ] 985.30M  41.5MB/s    eta 44s    "
      ]
     },
     {
@@ -887,7 +1224,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "mem-ctx-4k.pth       47%[========>           ]   1.25G  77.8MB/s    eta 19s    "
+      "024-E0_1-mem-ctx-4k  36%[======>             ] 993.33M  41.0MB/s    eta 44s    "
      ]
     },
     {
@@ -895,7 +1232,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "em-ctx-4k.pth        48%[========>           ]   1.27G  78.0MB/s    eta 18s    "
+      "24-E0_1-mem-ctx-4k.  37%[======>             ]   1002M  41.1MB/s    eta 44s    "
      ]
     },
     {
@@ -903,7 +1240,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "m-ctx-4k.pth         48%[========>           ]   1.28G  81.0MB/s    eta 18s    "
+      "4-E0_1-mem-ctx-4k.p  37%[======>             ]   1011M  41.0MB/s    eta 44s    "
      ]
     },
     {
@@ -911,7 +1248,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-ctx-4k.pth          49%[========>           ]   1.30G  82.9MB/s    eta 18s    "
+      "-E0_1-mem-ctx-4k.pt  37%[======>             ]   1019M  40.9MB/s    eta 44s    "
      ]
     },
     {
@@ -919,7 +1256,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "ctx-4k.pth           50%[=========>          ]   1.32G  83.3MB/s    eta 18s    "
+      "E0_1-mem-ctx-4k.pth  38%[======>             ]   1.00G  40.9MB/s    eta 42s    "
      ]
     },
     {
@@ -927,7 +1264,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "tx-4k.pth            50%[=========>          ]   1.33G  84.6MB/s    eta 18s    "
+      "0_1-mem-ctx-4k.pth   38%[======>             ]   1.01G  42.5MB/s    eta 42s    "
      ]
     },
     {
@@ -935,7 +1272,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "x-4k.pth             51%[=========>          ]   1.35G  84.7MB/s    eta 16s    "
+      "_1-mem-ctx-4k.pth    38%[======>             ]   1.02G  41.9MB/s    eta 42s    "
      ]
     },
     {
@@ -943,7 +1280,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-4k.pth              52%[=========>          ]   1.37G  85.0MB/s    eta 16s    "
+      "1-mem-ctx-4k.pth     39%[======>             ]   1.03G  40.6MB/s    eta 42s    "
      ]
     },
     {
@@ -951,7 +1288,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4k.pth               52%[=========>          ]   1.38G  85.2MB/s    eta 16s    "
+      "-mem-ctx-4k.pth      39%[======>             ]   1.04G  40.4MB/s    eta 42s    "
      ]
     },
     {
@@ -959,7 +1296,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "k.pth                53%[=========>          ]   1.40G  84.9MB/s    eta 16s    "
+      "mem-ctx-4k.pth       39%[======>             ]   1.04G  39.5MB/s    eta 41s    "
      ]
     },
     {
@@ -967,7 +1304,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      ".pth                 53%[=========>          ]   1.42G  84.1MB/s    eta 16s    "
+      "em-ctx-4k.pth        40%[=======>            ]   1.05G  40.4MB/s    eta 41s    "
      ]
     },
     {
@@ -975,7 +1312,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "pth                  54%[=========>          ]   1.43G  84.1MB/s    eta 15s    "
+      "m-ctx-4k.pth         40%[=======>            ]   1.06G  40.0MB/s    eta 41s    "
      ]
     },
     {
@@ -983,7 +1320,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "th                   55%[==========>         ]   1.45G  84.2MB/s    eta 15s    "
+      "-ctx-4k.pth          40%[=======>            ]   1.07G  40.5MB/s    eta 41s    "
      ]
     },
     {
@@ -991,7 +1328,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "h                    55%[==========>         ]   1.47G  84.2MB/s    eta 15s    "
+      "ctx-4k.pth           40%[=======>            ]   1.08G  40.1MB/s    eta 41s    "
      ]
     },
     {
@@ -999,7 +1336,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                     56%[==========>         ]   1.48G  84.1MB/s    eta 15s    "
+      "tx-4k.pth            41%[=======>            ]   1.09G  40.8MB/s    eta 40s    "
      ]
     },
     {
@@ -1007,7 +1344,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                  v  56%[==========>         ]   1.50G  83.5MB/s    eta 15s    "
+      "x-4k.pth             41%[=======>            ]   1.09G  40.6MB/s    eta 40s    "
      ]
     },
     {
@@ -1015,7 +1352,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                 v5  57%[==========>         ]   1.52G  83.5MB/s    eta 14s    "
+      "-4k.pth              41%[=======>            ]   1.10G  40.2MB/s    eta 40s    "
      ]
     },
     {
@@ -1023,7 +1360,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                v5-  58%[==========>         ]   1.53G  82.6MB/s    eta 14s    "
+      "4k.pth               42%[=======>            ]   1.11G  40.0MB/s    eta 40s    "
      ]
     },
     {
@@ -1031,7 +1368,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "               v5-L  58%[==========>         ]   1.55G  82.9MB/s    eta 14s    "
+      "k.pth                42%[=======>            ]   1.12G  39.8MB/s    eta 40s    "
      ]
     },
     {
@@ -1039,7 +1376,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "              v5-L9  59%[==========>         ]   1.56G  83.4MB/s    eta 14s    "
+      ".pth                 42%[=======>            ]   1.12G  39.2MB/s    eta 39s    "
      ]
     },
     {
@@ -1047,7 +1384,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "             v5-L96  60%[===========>        ]   1.58G  83.6MB/s    eta 14s    "
+      "pth                  43%[=======>            ]   1.13G  39.1MB/s    eta 39s    "
      ]
     },
     {
@@ -1055,7 +1392,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "            v5-L96-  60%[===========>        ]   1.59G  82.9MB/s    eta 13s    "
+      "th                   43%[=======>            ]   1.14G  38.9MB/s    eta 39s    "
      ]
     },
     {
@@ -1063,7 +1400,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "           v5-L96-D  60%[===========>        ]   1.60G  79.3MB/s    eta 13s    "
+      "h                    43%[=======>            ]   1.15G  39.3MB/s    eta 39s    "
      ]
     },
     {
@@ -1071,7 +1408,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "          v5-L96-D1  61%[===========>        ]   1.62G  79.4MB/s    eta 13s    "
+      "                     44%[=======>            ]   1.16G  38.9MB/s    eta 39s    "
      ]
     },
     {
@@ -1079,7 +1416,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5-L96-D10  62%[===========>        ]   1.63G  78.9MB/s    eta 13s    "
+      "                  v  44%[=======>            ]   1.17G  39.4MB/s    eta 38s    "
      ]
     },
     {
@@ -1087,7 +1424,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5-L96-D102  62%[===========>        ]   1.65G  78.9MB/s    eta 13s    "
+      "                 v5  44%[=======>            ]   1.17G  39.8MB/s    eta 38s    "
      ]
     },
     {
@@ -1095,7 +1432,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5-L96-D1024  63%[===========>        ]   1.67G  78.8MB/s    eta 12s    "
+      "                v5-  44%[=======>            ]   1.18G  39.8MB/s    eta 38s    "
      ]
     },
     {
@@ -1103,7 +1440,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5-L96-D1024-  63%[===========>        ]   1.68G  79.5MB/s    eta 12s    "
+      "               v5-L  45%[========>           ]   1.19G  39.6MB/s    eta 38s    "
      ]
     },
     {
@@ -1111,7 +1448,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5-L96-D1024-E  64%[===========>        ]   1.70G  79.1MB/s    eta 12s    "
+      "              v5-L9  45%[========>           ]   1.20G  39.7MB/s    eta 38s    "
      ]
     },
     {
@@ -1119,7 +1456,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5-L96-D1024-E0  65%[============>       ]   1.72G  78.9MB/s    eta 12s    "
+      "             v5-L96  45%[========>           ]   1.21G  39.9MB/s    eta 37s    "
      ]
     },
     {
@@ -1127,7 +1464,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5-L96-D1024-E0_  65%[============>       ]   1.73G  79.4MB/s    eta 12s    "
+      "            v5-L96-  46%[========>           ]   1.22G  39.7MB/s    eta 37s    "
      ]
     },
     {
@@ -1135,7 +1472,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5-L96-D1024-E0_1  66%[============>       ]   1.75G  79.3MB/s    eta 11s    "
+      "           v5-L96-D  46%[========>           ]   1.23G  39.2MB/s    eta 37s    "
      ]
     },
     {
@@ -1143,7 +1480,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5-L96-D1024-E0_1-  67%[============>       ]   1.77G  79.3MB/s    eta 11s    "
+      "          v5-L96-D1  46%[========>           ]   1.23G  39.5MB/s    eta 37s    "
      ]
     },
     {
@@ -1151,7 +1488,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5-L96-D1024-E0_1-m  67%[============>       ]   1.78G  79.4MB/s    eta 11s    "
+      "         v5-L96-D10  47%[========>           ]   1.24G  39.9MB/s    eta 37s    "
      ]
     },
     {
@@ -1159,7 +1496,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5-L96-D1024-E0_1-me  68%[============>       ]   1.80G  79.6MB/s    eta 11s    "
+      "        v5-L96-D102  47%[========>           ]   1.25G  39.6MB/s    eta 36s    "
      ]
     },
     {
@@ -1167,7 +1504,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-L96-D1024-E0_1-mem  68%[============>       ]   1.81G  79.1MB/s    eta 11s    "
+      "       v5-L96-D1024  47%[========>           ]   1.26G  39.9MB/s    eta 36s    "
      ]
     },
     {
@@ -1175,7 +1512,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "L96-D1024-E0_1-mem-  69%[============>       ]   1.83G  79.1MB/s    eta 10s    "
+      "      v5-L96-D1024-  48%[========>           ]   1.27G  40.6MB/s    eta 36s    "
      ]
     },
     {
@@ -1183,7 +1520,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "96-D1024-E0_1-mem-c  70%[=============>      ]   1.85G  78.6MB/s    eta 10s    "
+      "     v5-L96-D1024-E  48%[========>           ]   1.27G  40.1MB/s    eta 36s    "
      ]
     },
     {
@@ -1191,7 +1528,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "6-D1024-E0_1-mem-ct  70%[=============>      ]   1.86G  82.6MB/s    eta 10s    "
+      "    v5-L96-D1024-E0  48%[========>           ]   1.28G  40.5MB/s    eta 36s    "
      ]
     },
     {
@@ -1199,7 +1536,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-D1024-E0_1-mem-ctx  71%[=============>      ]   1.88G  83.3MB/s    eta 10s    "
+      "   v5-L96-D1024-E0_  49%[========>           ]   1.29G  40.5MB/s    eta 35s    "
      ]
     },
     {
@@ -1207,7 +1544,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "D1024-E0_1-mem-ctx-  71%[=============>      ]   1.89G  82.3MB/s    eta 10s    "
+      "  v5-L96-D1024-E0_1  49%[========>           ]   1.30G  40.6MB/s    eta 35s    "
      ]
     },
     {
@@ -1215,7 +1552,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1024-E0_1-mem-ctx-4  72%[=============>      ]   1.90G  78.4MB/s    eta 9s     "
+      " v5-L96-D1024-E0_1-  49%[========>           ]   1.31G  40.0MB/s    eta 35s    "
      ]
     },
     {
@@ -1223,7 +1560,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "024-E0_1-mem-ctx-4k  72%[=============>      ]   1.91G  76.3MB/s    eta 9s     "
+      "v5-L96-D1024-E0_1-m  49%[========>           ]   1.31G  37.9MB/s    eta 35s    "
      ]
     },
     {
@@ -1231,7 +1568,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "24-E0_1-mem-ctx-4k.  73%[=============>      ]   1.93G  76.6MB/s    eta 9s     "
+      "5-L96-D1024-E0_1-me  49%[========>           ]   1.31G  37.3MB/s    eta 35s    "
      ]
     },
     {
@@ -1239,7 +1576,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4-E0_1-mem-ctx-4k.p  73%[=============>      ]   1.94G  76.0MB/s    eta 9s     "
+      "-L96-D1024-E0_1-mem  50%[=========>          ]   1.33G  39.3MB/s    eta 34s    "
      ]
     },
     {
@@ -1247,7 +1584,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-E0_1-mem-ctx-4k.pt  74%[=============>      ]   1.96G  74.8MB/s    eta 9s     "
+      "L96-D1024-E0_1-mem-  50%[=========>          ]   1.34G  38.9MB/s    eta 34s    "
      ]
     },
     {
@@ -1255,7 +1592,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "E0_1-mem-ctx-4k.pth  74%[=============>      ]   1.97G  74.9MB/s    eta 8s     "
+      "96-D1024-E0_1-mem-c  51%[=========>          ]   1.34G  38.8MB/s    eta 34s    "
      ]
     },
     {
@@ -1263,7 +1600,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "0_1-mem-ctx-4k.pth   75%[==============>     ]   1.99G  73.7MB/s    eta 8s     "
+      "6-D1024-E0_1-mem-ct  51%[=========>          ]   1.35G  39.0MB/s    eta 34s    "
      ]
     },
     {
@@ -1271,7 +1608,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "_1-mem-ctx-4k.pth    76%[==============>     ]   2.01G  74.2MB/s    eta 8s     "
+      "-D1024-E0_1-mem-ctx  51%[=========>          ]   1.36G  39.1MB/s    eta 34s    "
      ]
     },
     {
@@ -1279,7 +1616,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "1-mem-ctx-4k.pth     76%[==============>     ]   2.02G  74.1MB/s    eta 8s     "
+      "D1024-E0_1-mem-ctx-  52%[=========>          ]   1.37G  38.5MB/s    eta 33s    "
      ]
     },
     {
@@ -1287,7 +1624,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-mem-ctx-4k.pth      77%[==============>     ]   2.04G  75.3MB/s    eta 8s     "
+      "1024-E0_1-mem-ctx-4  52%[=========>          ]   1.38G  39.2MB/s    eta 33s    "
      ]
     },
     {
@@ -1295,7 +1632,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "mem-ctx-4k.pth       78%[==============>     ]   2.06G  74.3MB/s    eta 7s     "
+      "024-E0_1-mem-ctx-4k  52%[=========>          ]   1.39G  39.4MB/s    eta 33s    "
      ]
     },
     {
@@ -1303,7 +1640,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "em-ctx-4k.pth        78%[==============>     ]   2.07G  75.5MB/s    eta 7s     "
+      "24-E0_1-mem-ctx-4k.  53%[=========>          ]   1.40G  39.5MB/s    eta 33s    "
      ]
     },
     {
@@ -1311,7 +1648,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "m-ctx-4k.pth         79%[==============>     ]   2.09G  74.4MB/s    eta 7s     "
+      "4-E0_1-mem-ctx-4k.p  53%[=========>          ]   1.40G  39.4MB/s    eta 33s    "
      ]
     },
     {
@@ -1319,7 +1656,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-ctx-4k.pth          80%[===============>    ]   2.11G  74.8MB/s    eta 7s     "
+      "-E0_1-mem-ctx-4k.pt  53%[=========>          ]   1.41G  39.4MB/s    eta 32s    "
      ]
     },
     {
@@ -1327,7 +1664,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "ctx-4k.pth           80%[===============>    ]   2.12G  75.1MB/s    eta 7s     "
+      "E0_1-mem-ctx-4k.pth  53%[=========>          ]   1.42G  39.0MB/s    eta 32s    "
      ]
     },
     {
@@ -1335,7 +1672,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "tx-4k.pth            81%[===============>    ]   2.14G  77.3MB/s    eta 6s     "
+      "0_1-mem-ctx-4k.pth   54%[=========>          ]   1.43G  40.3MB/s    eta 32s    "
      ]
     },
     {
@@ -1343,7 +1680,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "x-4k.pth             81%[===============>    ]   2.16G  81.6MB/s    eta 6s     "
+      "_1-mem-ctx-4k.pth    54%[=========>          ]   1.44G  40.6MB/s    eta 32s    "
      ]
     },
     {
@@ -1351,7 +1688,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "-4k.pth              82%[===============>    ]   2.17G  81.8MB/s    eta 6s     "
+      "1-mem-ctx-4k.pth     54%[=========>          ]   1.44G  42.8MB/s    eta 32s    "
      ]
     },
     {
@@ -1359,7 +1696,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "4k.pth               83%[===============>    ]   2.19G  82.7MB/s    eta 6s     "
+      "-mem-ctx-4k.pth      55%[==========>         ]   1.45G  42.5MB/s    eta 31s    "
      ]
     },
     {
@@ -1367,7 +1704,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "k.pth                83%[===============>    ]   2.21G  83.5MB/s    eta 6s     "
+      "mem-ctx-4k.pth       55%[==========>         ]   1.46G  40.5MB/s    eta 31s    "
      ]
     },
     {
@@ -1375,7 +1712,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      ".pth                 84%[===============>    ]   2.22G  82.8MB/s    eta 5s     "
+      "em-ctx-4k.pth        55%[==========>         ]   1.47G  40.7MB/s    eta 31s    "
      ]
     },
     {
@@ -1383,7 +1720,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "pth                  85%[================>   ]   2.24G  82.6MB/s    eta 5s     "
+      "m-ctx-4k.pth         56%[==========>         ]   1.48G  40.5MB/s    eta 31s    "
      ]
     },
     {
@@ -1391,7 +1728,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "th                   85%[================>   ]   2.26G  84.6MB/s    eta 5s     "
+      "-ctx-4k.pth          56%[==========>         ]   1.48G  39.5MB/s    eta 31s    "
      ]
     },
     {
@@ -1399,7 +1736,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "h                    86%[================>   ]   2.28G  84.4MB/s    eta 5s     "
+      "ctx-4k.pth           56%[==========>         ]   1.49G  39.4MB/s    eta 30s    "
      ]
     },
     {
@@ -1407,7 +1744,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                     87%[================>   ]   2.29G  84.7MB/s    eta 5s     "
+      "tx-4k.pth            57%[==========>         ]   1.50G  40.0MB/s    eta 30s    "
      ]
     },
     {
@@ -1415,7 +1752,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                  v  87%[================>   ]   2.31G  84.6MB/s    eta 4s     "
+      "x-4k.pth             57%[==========>         ]   1.51G  39.8MB/s    eta 30s    "
      ]
     },
     {
@@ -1423,7 +1760,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                 v5  88%[================>   ]   2.33G  84.7MB/s    eta 4s     "
+      "-4k.pth              57%[==========>         ]   1.52G  39.9MB/s    eta 30s    "
      ]
     },
     {
@@ -1431,7 +1768,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "                v5-  88%[================>   ]   2.34G  84.0MB/s    eta 4s     "
+      "4k.pth               57%[==========>         ]   1.53G  39.6MB/s    eta 30s    "
      ]
     },
     {
@@ -1439,7 +1776,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "               v5-L  89%[================>   ]   2.36G  85.4MB/s    eta 4s     "
+      "k.pth                58%[==========>         ]   1.54G  40.3MB/s    eta 28s    "
      ]
     },
     {
@@ -1447,7 +1784,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "              v5-L9  90%[=================>  ]   2.37G  84.9MB/s    eta 4s     "
+      ".pth                 58%[==========>         ]   1.54G  39.8MB/s    eta 28s    "
      ]
     },
     {
@@ -1455,7 +1792,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "             v5-L96  90%[=================>  ]   2.39G  85.9MB/s    eta 3s     "
+      "pth                  58%[==========>         ]   1.55G  40.0MB/s    eta 28s    "
      ]
     },
     {
@@ -1463,7 +1800,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "            v5-L96-  91%[=================>  ]   2.41G  85.4MB/s    eta 3s     "
+      "th                   59%[==========>         ]   1.56G  39.6MB/s    eta 28s    "
      ]
     },
     {
@@ -1471,7 +1808,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "           v5-L96-D  92%[=================>  ]   2.42G  85.3MB/s    eta 3s     "
+      "h                    59%[==========>         ]   1.57G  39.9MB/s    eta 28s    "
      ]
     },
     {
@@ -1479,7 +1816,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "          v5-L96-D1  92%[=================>  ]   2.44G  84.5MB/s    eta 3s     "
+      "                     59%[==========>         ]   1.58G  39.5MB/s    eta 27s    "
      ]
     },
     {
@@ -1487,7 +1824,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "         v5-L96-D10  93%[=================>  ]   2.46G  85.1MB/s    eta 3s     "
+      "                  v  60%[===========>        ]   1.58G  39.8MB/s    eta 27s    "
      ]
     },
     {
@@ -1495,7 +1832,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "        v5-L96-D102  94%[=================>  ]   2.47G  84.5MB/s    eta 2s     "
+      "                 v5  60%[===========>        ]   1.59G  39.8MB/s    eta 27s    "
      ]
     },
     {
@@ -1503,7 +1840,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "       v5-L96-D1024  94%[=================>  ]   2.49G  84.9MB/s    eta 2s     "
+      "                v5-  60%[===========>        ]   1.60G  39.9MB/s    eta 27s    "
      ]
     },
     {
@@ -1511,7 +1848,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "      v5-L96-D1024-  95%[==================> ]   2.51G  85.3MB/s    eta 2s     "
+      "               v5-L  61%[===========>        ]   1.61G  40.0MB/s    eta 27s    "
      ]
     },
     {
@@ -1519,7 +1856,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "     v5-L96-D1024-E  95%[==================> ]   2.53G  84.6MB/s    eta 2s     "
+      "              v5-L9  61%[===========>        ]   1.62G  40.5MB/s    eta 26s    "
      ]
     },
     {
@@ -1527,7 +1864,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "    v5-L96-D1024-E0  96%[==================> ]   2.54G  85.4MB/s    eta 2s     "
+      "             v5-L96  61%[===========>        ]   1.63G  40.7MB/s    eta 26s    "
      ]
     },
     {
@@ -1535,7 +1872,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "   v5-L96-D1024-E0_  97%[==================> ]   2.56G  84.5MB/s    eta 1s     "
+      "            v5-L96-  62%[===========>        ]   1.63G  40.6MB/s    eta 26s    "
      ]
     },
     {
@@ -1543,7 +1880,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "  v5-L96-D1024-E0_1  97%[==================> ]   2.58G  84.1MB/s    eta 1s     "
+      "           v5-L96-D  62%[===========>        ]   1.64G  41.2MB/s    eta 26s    "
      ]
     },
     {
@@ -1551,7 +1888,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      " v5-L96-D1024-E0_1-  98%[==================> ]   2.60G  85.3MB/s    eta 1s     "
+      "          v5-L96-D1  62%[===========>        ]   1.65G  41.0MB/s    eta 26s    "
      ]
     },
     {
@@ -1559,7 +1896,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "v5-L96-D1024-E0_1-m  99%[==================> ]   2.61G  85.9MB/s    eta 1s     "
+      "         v5-L96-D10  63%[===========>        ]   1.66G  40.7MB/s    eta 25s    "
      ]
     },
     {
@@ -1567,100348 +1904,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "5-L96-D1024-E0_1-me  99%[==================> ]   2.63G  84.7MB/s    eta 1s     \r",
-      "v5-L96-D1024-E0_1-m 100%[===================>]   2.63G  84.9MB/s    in 33s     \r\n",
-      "\r\n",
-      "2023-09-02 08:41:46 (80.6 MB/s) - ‘v5-L96-D1024-E0_1-mem-ctx-4k.pth’ saved [2825976699/2825976699]\r\n",
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 2.7G\r\n",
-      "drwxr-xr-x  2 root root   54 Sep  2 08:41 .\r\n",
-      "drwxr-xr-x 19 root root 4.0K Sep  2 08:41 ..\r\n",
-      "-rw-r--r--  1 root root 2.7G Sep  2 05:37 v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/{FILENAME_PREFIX}-mem-ctx-4k.pth\"\n",
-    "\n",
-    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
-    "    ls -alh ."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "eafbdbb6",
-   "metadata": {
-    "papermill": {
-     "duration": 0.010117,
-     "end_time": "2023-09-02T08:41:46.820211",
-     "exception": false,
-     "start_time": "2023-09-02T08:41:46.810094",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "source": [
-    "## Tune 6 : Ramping up the ctx size (8192), memory training\n",
-    "\n",
-    "- Tune 6: Large ctx size (8192), Scaling up!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7a3deb9d",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:46.844105Z",
-     "iopub.status.busy": "2023-09-02T08:41:46.843909Z",
-     "iopub.status.idle": "2023-09-02T08:41:53.856857Z",
-     "shell.execute_reply": "2023-09-02T08:41:53.856017Z"
-    },
-    "papermill": {
-     "duration": 7.084423,
-     "end_time": "2023-09-02T08:41:53.914741",
-     "exception": false,
-     "start_time": "2023-09-02T08:41:46.830318",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## Generating word reptition dataset ##\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 130 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 89 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 179 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 65 max words, 50 samples - at ../dataset/gen-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 34 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 75 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 263 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 103 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 61 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 38 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 554 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 55 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 43 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 48 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 29 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 29 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 25 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 68 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 34 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 21 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 18 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 16 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 18 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 21 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 41 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 13 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 100 max words, 50 samples - at ../dataset/gen-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 33 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 13 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 17 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 13 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 15 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 8 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 14 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 29 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 11 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 23 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 9 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 10 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 23 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 41 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 60 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 3 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "## Done ##\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "total 6.1G\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  10K Sep  2 08:41 gen-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  54K Sep  2 08:41 gen-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 494K Sep  2 08:41 gen-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  56K Sep  2 08:41 gen-word-105-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  60K Sep  2 08:41 gen-word-110-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  22M Sep  2 08:41 gen-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  64K Sep  2 08:41 gen-word-115-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  63K Sep  2 08:41 gen-word-120-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  23M Sep  2 08:41 gen-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  69K Sep  2 08:41 gen-word-125-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  71K Sep  2 08:41 gen-word-130-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  25M Sep  2 08:41 gen-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  68K Sep  2 08:41 gen-word-135-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  70K Sep  2 08:41 gen-word-140-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27M Sep  2 08:41 gen-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  78K Sep  2 08:41 gen-word-145-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  13K Sep  2 08:41 gen-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  79K Sep  2 08:41 gen-word-150-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29M Sep  2 08:41 gen-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  82K Sep  2 08:41 gen-word-155-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  83K Sep  2 08:41 gen-word-160-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  31M Sep  2 08:41 gen-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  86K Sep  2 08:41 gen-word-165-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  85K Sep  2 08:41 gen-word-170-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  33M Sep  2 08:41 gen-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  93K Sep  2 08:41 gen-word-175-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  90K Sep  2 08:41 gen-word-180-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  35M Sep  2 08:41 gen-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  98K Sep  2 08:41 gen-word-185-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 100K Sep  2 08:41 gen-word-190-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  37M Sep  2 08:41 gen-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  95K Sep  2 08:41 gen-word-195-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  14K Sep  2 08:41 gen-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 104K Sep  2 08:41 gen-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  39M Sep  2 08:41 gen-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 108K Sep  2 08:41 gen-word-205-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 109K Sep  2 08:41 gen-word-210-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  41M Sep  2 08:41 gen-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 109K Sep  2 08:41 gen-word-215-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 111K Sep  2 08:41 gen-word-220-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  42M Sep  2 08:41 gen-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 117K Sep  2 08:41 gen-word-225-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 118K Sep  2 08:41 gen-word-230-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  44M Sep  2 08:41 gen-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 120K Sep  2 08:41 gen-word-235-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 122K Sep  2 08:41 gen-word-240-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  46M Sep  2 08:41 gen-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 127K Sep  2 08:41 gen-word-245-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  17K Sep  2 08:41 gen-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 129K Sep  2 08:41 gen-word-250-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  48M Sep  2 08:41 gen-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 131K Sep  2 08:41 gen-word-255-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 130K Sep  2 08:41 gen-word-260-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  50M Sep  2 08:41 gen-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 136K Sep  2 08:41 gen-word-265-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 141K Sep  2 08:41 gen-word-270-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  52M Sep  2 08:41 gen-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 137K Sep  2 08:41 gen-word-275-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 140K Sep  2 08:41 gen-word-280-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  54M Sep  2 08:41 gen-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 144K Sep  2 08:41 gen-word-285-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 149K Sep  2 08:41 gen-word-290-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  56M Sep  2 08:41 gen-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 148K Sep  2 08:41 gen-word-295-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  20K Sep  2 08:41 gen-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 153K Sep  2 08:41 gen-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  58M Sep  2 08:41 gen-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 156K Sep  2 08:41 gen-word-305-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 158K Sep  2 08:41 gen-word-310-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  60M Sep  2 08:41 gen-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 161K Sep  2 08:41 gen-word-315-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 160K Sep  2 08:41 gen-word-320-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  61M Sep  2 08:41 gen-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 162K Sep  2 08:41 gen-word-325-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 168K Sep  2 08:41 gen-word-330-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  63M Sep  2 08:41 gen-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 167K Sep  2 08:41 gen-word-335-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 169K Sep  2 08:41 gen-word-340-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  65M Sep  2 08:41 gen-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 171K Sep  2 08:41 gen-word-345-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  22K Sep  2 08:41 gen-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 178K Sep  2 08:41 gen-word-350-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  67M Sep  2 08:41 gen-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 176K Sep  2 08:41 gen-word-355-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 180K Sep  2 08:41 gen-word-360-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  69M Sep  2 08:41 gen-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 181K Sep  2 08:41 gen-word-365-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 184K Sep  2 08:41 gen-word-370-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  71M Sep  2 08:41 gen-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 184K Sep  2 08:41 gen-word-375-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 188K Sep  2 08:41 gen-word-380-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  73M Sep  2 08:41 gen-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 195K Sep  2 08:41 gen-word-385-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 195K Sep  2 08:41 gen-word-390-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  75M Sep  2 08:41 gen-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 197K Sep  2 08:41 gen-word-395-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  24K Sep  2 08:41 gen-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 201K Sep  2 08:41 gen-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  77M Sep  2 08:41 gen-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 202K Sep  2 08:41 gen-word-405-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 206K Sep  2 08:41 gen-word-410-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  79M Sep  2 08:41 gen-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 211K Sep  2 08:41 gen-word-415-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 213K Sep  2 08:41 gen-word-420-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  80M Sep  2 08:41 gen-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 209K Sep  2 08:41 gen-word-425-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 214K Sep  2 08:41 gen-word-430-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  82M Sep  2 08:41 gen-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 216K Sep  2 08:41 gen-word-435-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 218K Sep  2 08:41 gen-word-440-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  84M Sep  2 08:41 gen-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 221K Sep  2 08:41 gen-word-445-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 gen-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 225K Sep  2 08:41 gen-word-450-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  86M Sep  2 08:41 gen-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 230K Sep  2 08:41 gen-word-455-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 228K Sep  2 08:41 gen-word-460-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  88M Sep  2 08:41 gen-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 232K Sep  2 08:41 gen-word-465-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 241K Sep  2 08:41 gen-word-470-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  90M Sep  2 08:41 gen-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 237K Sep  2 08:41 gen-word-475-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 239K Sep  2 08:41 gen-word-480-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  92M Sep  2 08:41 gen-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 242K Sep  2 08:41 gen-word-485-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 247K Sep  2 08:41 gen-word-490-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  94M Sep  2 08:41 gen-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 244K Sep  2 08:41 gen-word-495-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 7.3K Sep  2 08:41 gen-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 gen-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 252K Sep  2 08:41 gen-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  96M Sep  2 08:41 gen-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 249K Sep  2 08:41 gen-word-505-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 250K Sep  2 08:41 gen-word-510-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  98M Sep  2 08:41 gen-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 253K Sep  2 08:41 gen-word-515-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 258K Sep  2 08:41 gen-word-520-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 100M Sep  2 08:41 gen-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 262K Sep  2 08:41 gen-word-525-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 265K Sep  2 08:41 gen-word-530-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 101M Sep  2 08:41 gen-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 263K Sep  2 08:41 gen-word-535-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 267K Sep  2 08:41 gen-word-540-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 103M Sep  2 08:41 gen-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 268K Sep  2 08:41 gen-word-545-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  33K Sep  2 08:41 gen-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 274K Sep  2 08:41 gen-word-550-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 105M Sep  2 08:41 gen-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 273K Sep  2 08:41 gen-word-555-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 279K Sep  2 08:41 gen-word-560-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 107M Sep  2 08:41 gen-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 282K Sep  2 08:41 gen-word-565-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 281K Sep  2 08:41 gen-word-570-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 109M Sep  2 08:41 gen-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 283K Sep  2 08:41 gen-word-575-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 286K Sep  2 08:41 gen-word-580-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 111M Sep  2 08:41 gen-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 288K Sep  2 08:41 gen-word-585-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 291K Sep  2 08:41 gen-word-590-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 113M Sep  2 08:41 gen-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 296K Sep  2 08:41 gen-word-595-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  34K Sep  2 08:41 gen-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 300K Sep  2 08:41 gen-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 115M Sep  2 08:41 gen-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 300K Sep  2 08:41 gen-word-605-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 296K Sep  2 08:41 gen-word-610-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 117M Sep  2 08:41 gen-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 303K Sep  2 08:41 gen-word-615-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 309K Sep  2 08:41 gen-word-620-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 119M Sep  2 08:41 gen-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 311K Sep  2 08:41 gen-word-625-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 312K Sep  2 08:41 gen-word-630-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 120M Sep  2 08:41 gen-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 316K Sep  2 08:41 gen-word-635-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 319K Sep  2 08:41 gen-word-640-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 122M Sep  2 08:41 gen-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 313K Sep  2 08:41 gen-word-645-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  37K Sep  2 08:41 gen-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 316K Sep  2 08:41 gen-word-650-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 124M Sep  2 08:41 gen-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 323K Sep  2 08:41 gen-word-655-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 330K Sep  2 08:41 gen-word-660-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 126M Sep  2 08:41 gen-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 328K Sep  2 08:41 gen-word-665-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 332K Sep  2 08:41 gen-word-670-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 128M Sep  2 08:41 gen-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 336K Sep  2 08:41 gen-word-675-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 335K Sep  2 08:41 gen-word-680-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 130M Sep  2 08:41 gen-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 338K Sep  2 08:41 gen-word-685-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 345K Sep  2 08:41 gen-word-690-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 132M Sep  2 08:41 gen-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 340K Sep  2 08:41 gen-word-695-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  39K Sep  2 08:41 gen-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 346K Sep  2 08:41 gen-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 134M Sep  2 08:41 gen-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 345K Sep  2 08:41 gen-word-705-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 354K Sep  2 08:41 gen-word-710-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 135M Sep  2 08:41 gen-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 350K Sep  2 08:41 gen-word-715-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 358K Sep  2 08:41 gen-word-720-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 137M Sep  2 08:41 gen-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 357K Sep  2 08:41 gen-word-725-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 363K Sep  2 08:41 gen-word-730-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 139M Sep  2 08:41 gen-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 361K Sep  2 08:41 gen-word-735-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 364K Sep  2 08:41 gen-word-740-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 141M Sep  2 08:41 gen-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 366K Sep  2 08:41 gen-word-745-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  39K Sep  2 08:41 gen-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 365K Sep  2 08:41 gen-word-750-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 143M Sep  2 08:41 gen-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 368K Sep  2 08:41 gen-word-755-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 375K Sep  2 08:41 gen-word-760-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 145M Sep  2 08:41 gen-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 378K Sep  2 08:41 gen-word-765-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 377K Sep  2 08:41 gen-word-770-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 147M Sep  2 08:41 gen-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 381K Sep  2 08:41 gen-word-775-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 379K Sep  2 08:41 gen-word-780-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 149M Sep  2 08:41 gen-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 387K Sep  2 08:41 gen-word-785-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 391K Sep  2 08:41 gen-word-790-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 151M Sep  2 08:41 gen-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 389K Sep  2 08:41 gen-word-795-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  45K Sep  2 08:41 gen-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 387K Sep  2 08:41 gen-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 153M Sep  2 08:41 gen-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 395K Sep  2 08:41 gen-word-805-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 403K Sep  2 08:41 gen-word-810-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 399K Sep  2 08:41 gen-word-815-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 404K Sep  2 08:41 gen-word-820-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 407K Sep  2 08:41 gen-word-825-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 406K Sep  2 08:41 gen-word-830-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 414K Sep  2 08:41 gen-word-835-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 421K Sep  2 08:41 gen-word-840-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 415K Sep  2 08:41 gen-word-845-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  47K Sep  2 08:41 gen-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 416K Sep  2 08:41 gen-word-850-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 420K Sep  2 08:41 gen-word-855-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 428K Sep  2 08:41 gen-word-860-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 427K Sep  2 08:41 gen-word-865-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 425K Sep  2 08:41 gen-word-870-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 430K Sep  2 08:41 gen-word-875-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 434K Sep  2 08:41 gen-word-880-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 435K Sep  2 08:41 gen-word-885-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 441K Sep  2 08:41 gen-word-890-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 442K Sep  2 08:41 gen-word-895-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  46K Sep  2 08:41 gen-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 440K Sep  2 08:41 gen-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 454K Sep  2 08:41 gen-word-905-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 451K Sep  2 08:41 gen-word-910-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 447K Sep  2 08:41 gen-word-915-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 451K Sep  2 08:41 gen-word-920-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 460K Sep  2 08:41 gen-word-925-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 456K Sep  2 08:41 gen-word-930-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 458K Sep  2 08:41 gen-word-935-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 457K Sep  2 08:41 gen-word-940-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 465K Sep  2 08:41 gen-word-945-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  52K Sep  2 08:41 gen-word-95-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 466K Sep  2 08:41 gen-word-950-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 465K Sep  2 08:41 gen-word-955-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 468K Sep  2 08:41 gen-word-960-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 471K Sep  2 08:41 gen-word-965-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 481K Sep  2 08:41 gen-word-970-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 477K Sep  2 08:41 gen-word-975-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 482K Sep  2 08:41 gen-word-980-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 483K Sep  2 08:41 gen-word-985-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 492K Sep  2 08:41 gen-word-990-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 491K Sep  2 08:41 gen-word-995-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  51K Sep  2 08:41 shuffle-word-10-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  30K Sep  2 08:41 shuffle-word-100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-1000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-105-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-110-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 523K Sep  2 08:41 shuffle-word-1100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-115-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-120-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 523K Sep  2 08:41 shuffle-word-1200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-125-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-130-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 521K Sep  2 08:41 shuffle-word-1300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-135-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-140-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 523K Sep  2 08:41 shuffle-word-1400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-145-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  40K Sep  2 08:41 shuffle-word-15-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-150-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-1500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-155-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-160-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep  2 08:41 shuffle-word-1600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-165-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-170-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-1700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-175-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-180-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 518K Sep  2 08:41 shuffle-word-1800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-185-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-190-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 521K Sep  2 08:41 shuffle-word-1900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-195-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  41K Sep  2 08:41 shuffle-word-20-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-2000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-205-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-210-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-2100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-215-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-220-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-2200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-225-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-230-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 520K Sep  2 08:41 shuffle-word-2300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-235-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-240-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 521K Sep  2 08:41 shuffle-word-2400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-245-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  36K Sep  2 08:41 shuffle-word-25-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-250-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 519K Sep  2 08:41 shuffle-word-2500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-255-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-260-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 515K Sep  2 08:41 shuffle-word-2600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-265-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-270-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 511K Sep  2 08:41 shuffle-word-2700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-275-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-280-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 511K Sep  2 08:41 shuffle-word-2800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-285-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-290-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-2900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-295-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  34K Sep  2 08:41 shuffle-word-30-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-3000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-305-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-310-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-315-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-320-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-3200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-325-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-330-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-335-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-340-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-345-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  33K Sep  2 08:41 shuffle-word-35-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-350-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-3500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-355-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-360-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-365-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-370-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-375-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-380-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-385-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-390-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-3900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-395-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  34K Sep  2 08:41 shuffle-word-40-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-405-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-410-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-415-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-420-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-425-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-430-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-435-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-440-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-4400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-445-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  32K Sep  2 08:41 shuffle-word-45-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-450-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-4500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-455-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-460-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-465-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-470-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-475-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-480-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-4800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-485-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-490-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-4900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-495-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  80K Sep  2 08:41 shuffle-word-5-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-50-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-505-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-510-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-515-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-520-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-5200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-525-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-530-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-535-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-540-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-545-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-55-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-550-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-5500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-555-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-560-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-565-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-570-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-575-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-580-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-585-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-590-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-5900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-595-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  29K Sep  2 08:41 shuffle-word-60-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-605-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-610-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-615-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-620-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-625-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-630-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-635-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-640-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-645-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  31K Sep  2 08:41 shuffle-word-65-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-650-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-655-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-660-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-665-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-670-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-6700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-675-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-680-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-6800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-685-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-690-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-6900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-695-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  30K Sep  2 08:41 shuffle-word-70-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-7000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-705-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-710-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7100-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-715-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-720-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7200-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-725-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-730-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7300-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-735-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-740-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7400-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-745-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-75-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-750-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 508K Sep  2 08:41 shuffle-word-7500-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-755-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-760-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7600-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-765-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-770-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7700-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-775-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-780-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-785-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-790-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-7900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-795-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-80-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-800-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 509K Sep  2 08:41 shuffle-word-8000-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-805-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-810-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-815-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-820-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-825-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-830-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-835-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-840-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-845-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  28K Sep  2 08:41 shuffle-word-85-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-850-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-855-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-860-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-865-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-870-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-875-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-880-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-885-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-890-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-895-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  30K Sep  2 08:41 shuffle-word-90-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-900-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-905-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-910-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-915-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-920-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-925-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-930-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-935-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-940-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-945-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  30K Sep  2 08:41 shuffle-word-95-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-950-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-955-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-960-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-965-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-970-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-975-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-980-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-985-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  26K Sep  2 08:41 shuffle-word-990-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root  27K Sep  2 08:41 shuffle-word-995-count.jsonl\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "-rw-r--r-- 1 root root 5.9K Sep  2 08:41 word-2-count.jsonl\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%script bash\n",
-    "\n",
-    "########################################\n",
-    "# Generate the required jsonl dataset\n",
-    "########################################\n",
-    "\n",
-    "# Reset the dataset dir\n",
-    "mkdir -p ../dataset\n",
-    "rm -rf ../dataset/*.jsonl\n",
-    "\n",
-    "# Generate the various datasets\n",
-    "echo \"## Generating word reptition dataset ##\"\n",
-    "\n",
-    "#\n",
-    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
-    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
-    "#\n",
-    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 50 &\n",
-    "for i in {5..1000..5} \n",
-    "do\n",
-    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 50 & \n",
-    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
-    "done\n",
-    "\n",
-    "#\n",
-    "# Ramping up the 50+ - 4200 words dataset\n",
-    "# \n",
-    "for i in {1100..8000..100} \n",
-    "do\n",
-    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
-    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
-    "done\n",
-    "\n",
-    "wait\n",
-    "echo \"## Done ##\"\n",
-    "\n",
-    "ls -lh ../dataset/"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "15e03154",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-02T08:41:54.110558Z",
-     "iopub.status.busy": "2023-09-02T08:41:54.110363Z",
-     "iopub.status.idle": "2023-09-03T06:13:05.260832Z",
-     "shell.execute_reply": "2023-09-03T06:13:05.259129Z"
-    },
-    "papermill": {
-     "duration": 77471.213932,
-     "end_time": "2023-09-03T06:13:05.263168",
-     "exception": false,
-     "start_time": "2023-09-02T08:41:54.049236",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=1e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=1e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'].\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 3809433325\r\n",
-      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
-      "Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230902_084200-42tne4xv\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/42tne4xv\u001b[0m\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/connector.py:562: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
-      "  rank_zero_warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "GPU available: True (cuda), used: True\r\n",
-      "TPU available: False, using: 0 TPU cores\r\n",
-      "IPU available: False, using: 0 IPUs\r\n",
-      "HPU available: False, using: 0 HPUs\r\n",
-      "\r\n",
-      "\r\n",
-      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
-      "   - target_batch_size:       256\r\n",
-      "   - num_nodes:               1\r\n",
-      "   - num_devices:             8\r\n",
-      "   - accumulate_grad_batches: 32\r\n",
-      "   - effective_batch_size:    256\r\n",
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Resolving data files:   0%|                             | 0/541 [00:00<?, ?it/s]\r",
-      "Resolving data files: 100%|███████████████| 541/541 [00:00<00:00, 240959.80it/s]\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloading and preparing dataset json/default to /actions-runner/.cache/huggingface/datasets/json/default-ced8f939a649d19b/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\r\n",
-      "\r",
-      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]\r",
-      "Downloading data files: 100%|████████████████████| 1/1 [00:00<00:00, 140.99it/s]\r\n",
-      "\r",
-      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.44it/s]\r",
-      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.42it/s]\r\n",
-      "\r",
-      "Generating train split: 0 examples [00:00, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n",
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n",
-      "Setting ds_accelerator to cuda (auto detect)\r\n",
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 4] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 3] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 7] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 2] Global seed set to 3809433325\r\n",
-      "[rank: 1] Global seed set to 3809433325\r\n",
-      "[rank: 6] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 5] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 1198 examples [00:10, 113.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 6862 examples [00:10, 869.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 17785 examples [00:10, 2899.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 28451 examples [00:10, 5594.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 38720 examples [00:10, 9032.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 48274 examples [00:11, 12811.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 56245 examples [00:11, 16426.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 63864 examples [00:11, 20673.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 70478 examples [00:11, 23956.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 78940 examples [00:11, 30678.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 85332 examples [00:11, 31164.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 90808 examples [00:12, 30780.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 95585 examples [00:12, 29599.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 99965 examples [00:12, 27833.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 103577 examples [00:12, 26862.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 106960 examples [00:12, 27666.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 110124 examples [00:12, 27786.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Generating train split: 113217 examples [00:12, 27474.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                                                                    \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset json downloaded and prepared to /actions-runner/.cache/huggingface/datasets/json/default-ced8f939a649d19b/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\r\n",
-      "\r",
-      "  0%|                                                     | 0/1 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 39.29it/s]\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|                      | 0/154893 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 53/154893 [00:01<1:21:49, 31.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 71/154893 [00:01<1:01:12, 42.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|             | 96/154893 [00:01<44:24, 58.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|            | 141/154893 [00:02<26:45, 96.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 168/154893 [00:02<22:18, 115.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 189/154893 [00:02<22:02, 116.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 228/154893 [00:02<16:02, 160.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 252/154893 [00:02<16:02, 160.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 278/154893 [00:02<17:39, 145.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 328/154893 [00:03<17:56, 143.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 526/154893 [00:03<06:06, 420.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 597/154893 [00:03<05:48, 442.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 662/154893 [00:03<06:29, 395.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   0%|           | 731/154893 [00:04<07:32, 340.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|           | 815/154893 [00:04<06:28, 397.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|           | 885/154893 [00:04<05:51, 437.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|           | 939/154893 [00:04<06:49, 376.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1001/154893 [00:04<06:04, 421.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1051/154893 [00:04<08:38, 296.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1290/154893 [00:05<04:16, 598.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1371/154893 [00:05<04:40, 547.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1444/154893 [00:05<04:52, 523.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1531/154893 [00:05<06:26, 396.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1674/154893 [00:06<06:00, 424.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1792/154893 [00:06<04:49, 529.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|          | 1861/154893 [00:06<07:13, 353.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|▏         | 2002/154893 [00:06<06:40, 381.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|▏         | 2100/154893 [00:07<07:13, 352.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|▏         | 2184/154893 [00:07<08:26, 301.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|▏         | 2278/154893 [00:08<08:50, 287.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   1%|▏         | 2312/154893 [00:08<10:33, 240.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2505/154893 [00:08<05:56, 427.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2574/154893 [00:08<05:38, 450.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2641/154893 [00:08<05:13, 485.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2707/154893 [00:08<05:42, 444.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2779/154893 [00:09<05:57, 425.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2894/154893 [00:09<05:21, 472.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 2975/154893 [00:09<05:41, 445.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3095/154893 [00:09<04:23, 575.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3164/154893 [00:09<05:53, 428.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3289/154893 [00:09<04:36, 548.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3364/154893 [00:10<04:23, 575.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3433/154893 [00:10<04:38, 543.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3496/154893 [00:10<05:09, 488.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3621/154893 [00:10<03:53, 647.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3698/154893 [00:10<05:10, 486.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   2%|▏         | 3802/154893 [00:10<04:53, 515.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 3913/154893 [00:11<06:14, 402.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4164/154893 [00:11<04:22, 573.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4407/154893 [00:11<03:11, 787.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4501/154893 [00:11<03:15, 769.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4624/154893 [00:12<03:25, 732.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4729/154893 [00:12<03:28, 721.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4822/154893 [00:12<04:21, 572.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 4949/154893 [00:12<04:24, 567.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 5011/154893 [00:13<05:44, 434.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 5209/154893 [00:13<04:44, 526.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   3%|▎         | 5374/154893 [00:13<03:42, 670.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▎         | 5457/154893 [00:13<05:16, 471.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▎         | 5520/154893 [00:14<06:31, 381.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▎         | 5677/154893 [00:14<05:07, 484.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 5934/154893 [00:14<03:21, 738.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6138/154893 [00:14<03:16, 755.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6263/154893 [00:14<03:20, 743.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6443/154893 [00:15<03:45, 659.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6518/154893 [00:15<05:04, 486.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6578/154893 [00:15<05:59, 412.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6670/154893 [00:16<06:51, 359.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6744/154893 [00:16<06:04, 406.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6798/154893 [00:16<06:19, 390.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   4%|▍         | 6969/154893 [00:16<04:07, 596.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▍         | 7184/154893 [00:16<02:48, 876.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▍        | 7406/154893 [00:16<02:08, 1148.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▍         | 7551/154893 [00:17<02:46, 883.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▍         | 7669/154893 [00:17<02:50, 861.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 7776/154893 [00:17<02:43, 900.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 7906/154893 [00:17<02:29, 986.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 8019/154893 [00:17<02:34, 951.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 8124/154893 [00:17<02:49, 865.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 8241/154893 [00:17<02:37, 932.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 8346/154893 [00:17<03:02, 801.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   5%|▌         | 8437/154893 [00:18<03:06, 786.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8521/154893 [00:18<03:19, 733.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8600/154893 [00:18<03:24, 715.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8674/154893 [00:18<03:26, 708.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8747/154893 [00:18<03:25, 711.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8825/154893 [00:18<03:20, 729.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8899/154893 [00:18<03:22, 720.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 8972/154893 [00:18<03:38, 668.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9040/154893 [00:19<03:44, 650.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9106/154893 [00:19<03:55, 619.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9169/154893 [00:19<04:17, 565.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9228/154893 [00:19<04:29, 541.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9287/154893 [00:19<04:22, 553.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9344/154893 [00:19<04:21, 556.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9401/154893 [00:19<04:49, 502.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9462/154893 [00:19<04:50, 500.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9522/154893 [00:19<04:41, 515.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9590/154893 [00:20<04:19, 559.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌         | 9647/154893 [00:20<04:25, 548.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▋         | 9723/154893 [00:20<03:59, 605.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▋         | 9813/154893 [00:20<03:36, 670.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▋         | 9884/154893 [00:20<03:32, 681.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▋         | 9977/154893 [00:20<03:17, 733.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   6%|▌        | 10051/154893 [00:20<03:30, 689.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10122/154893 [00:20<03:29, 691.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10194/154893 [00:20<03:32, 680.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10263/154893 [00:21<03:33, 677.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10333/154893 [00:21<03:33, 676.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10404/154893 [00:21<03:31, 682.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10487/154893 [00:21<03:19, 723.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10562/154893 [00:21<03:17, 730.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10636/154893 [00:21<03:39, 656.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▌        | 10704/154893 [00:21<03:43, 643.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 10790/154893 [00:21<03:25, 702.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 10863/154893 [00:21<03:23, 708.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 10935/154893 [00:22<03:39, 655.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11003/154893 [00:22<03:44, 641.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11069/154893 [00:22<03:58, 603.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11131/154893 [00:22<03:56, 606.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11193/154893 [00:22<03:58, 603.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11258/154893 [00:22<03:57, 605.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11329/154893 [00:22<03:47, 631.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11401/154893 [00:22<03:39, 654.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   7%|▋        | 11490/154893 [00:22<03:19, 717.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▌       | 11691/154893 [00:22<02:11, 1087.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 11801/154893 [00:23<02:26, 976.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 11905/154893 [00:23<02:46, 858.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 11995/154893 [00:23<03:01, 788.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12078/154893 [00:23<03:22, 704.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12152/154893 [00:23<03:27, 688.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12223/154893 [00:23<03:28, 685.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12293/154893 [00:23<03:41, 644.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12359/154893 [00:24<03:40, 646.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12426/154893 [00:24<03:48, 624.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12489/154893 [00:24<03:50, 617.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12552/154893 [00:24<03:55, 603.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12638/154893 [00:24<03:30, 674.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12707/154893 [00:24<03:42, 637.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12772/154893 [00:24<03:55, 603.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12834/154893 [00:24<04:02, 585.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▋        | 12894/154893 [00:24<04:01, 588.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▊        | 12954/154893 [00:25<04:00, 591.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▊        | 13068/154893 [00:25<03:10, 746.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   8%|▊        | 13145/154893 [00:25<03:40, 642.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13230/154893 [00:25<03:23, 695.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13304/154893 [00:25<03:23, 694.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13444/154893 [00:25<02:39, 888.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13560/154893 [00:25<02:27, 956.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13660/154893 [00:25<02:27, 958.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13758/154893 [00:25<02:38, 888.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13851/154893 [00:26<02:59, 785.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 13960/154893 [00:26<02:44, 857.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14054/154893 [00:26<02:50, 824.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14142/154893 [00:26<02:50, 826.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14227/154893 [00:26<03:13, 728.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14303/154893 [00:26<03:15, 717.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14377/154893 [00:26<03:18, 707.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14451/154893 [00:26<03:28, 672.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14521/154893 [00:27<03:40, 635.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14597/154893 [00:27<03:32, 659.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):   9%|▊        | 14664/154893 [00:27<03:34, 652.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▊        | 14730/154893 [00:27<03:42, 628.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▊        | 14794/154893 [00:27<04:02, 577.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▊        | 14867/154893 [00:27<03:50, 606.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▊        | 14929/154893 [00:27<03:54, 597.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▊        | 15010/154893 [00:27<03:34, 650.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15076/154893 [00:27<03:38, 639.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15141/154893 [00:28<03:45, 620.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15204/154893 [00:28<03:48, 612.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15275/154893 [00:28<03:38, 638.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15340/154893 [00:28<03:50, 604.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15410/154893 [00:28<03:41, 631.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15474/154893 [00:28<04:04, 569.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15556/154893 [00:28<03:40, 630.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15621/154893 [00:28<03:53, 596.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15696/154893 [00:28<03:41, 628.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15761/154893 [00:29<03:40, 631.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15830/154893 [00:29<03:37, 638.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15909/154893 [00:29<03:26, 671.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 15981/154893 [00:29<03:34, 648.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 16054/154893 [00:29<03:27, 669.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 16123/154893 [00:29<03:44, 619.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 16187/154893 [00:29<03:42, 623.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  10%|▉        | 16251/154893 [00:29<03:54, 591.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16320/154893 [00:29<03:44, 618.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16384/154893 [00:30<03:54, 590.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16444/154893 [00:30<04:11, 550.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16527/154893 [00:30<03:41, 623.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16591/154893 [00:30<03:40, 625.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16656/154893 [00:30<03:44, 616.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16726/154893 [00:30<03:38, 631.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16790/154893 [00:30<03:51, 596.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16852/154893 [00:30<03:53, 591.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16912/154893 [00:30<04:01, 570.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 16971/154893 [00:31<04:00, 573.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 17030/154893 [00:31<04:01, 570.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 17100/154893 [00:31<03:47, 605.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|▉        | 17178/154893 [00:31<03:30, 654.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17244/154893 [00:31<03:45, 609.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17306/154893 [00:31<03:52, 592.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17368/154893 [00:31<03:49, 599.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17452/154893 [00:31<03:26, 667.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17530/154893 [00:31<03:18, 690.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17601/154893 [00:31<03:17, 694.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17671/154893 [00:32<03:18, 689.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  11%|█        | 17751/154893 [00:32<03:09, 721.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 17824/154893 [00:32<03:16, 696.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 17900/154893 [00:32<03:12, 712.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 17983/154893 [00:32<03:03, 745.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18058/154893 [00:32<03:08, 727.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18157/154893 [00:32<02:50, 801.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18238/154893 [00:32<03:01, 753.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18325/154893 [00:32<03:05, 734.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18427/154893 [00:33<02:48, 812.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18515/154893 [00:33<02:58, 765.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 18652/154893 [00:33<02:26, 928.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|▉       | 18778/154893 [00:33<02:13, 1017.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|▉       | 18917/154893 [00:33<02:08, 1055.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|▉       | 19058/154893 [00:33<02:00, 1127.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 19173/154893 [00:33<02:16, 992.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  12%|█        | 19281/154893 [00:33<02:21, 961.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 19425/154893 [00:33<02:04, 1084.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 19600/154893 [00:34<01:47, 1262.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 19750/154893 [00:34<01:41, 1327.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 19978/154893 [00:34<01:27, 1537.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 20134/154893 [00:34<01:29, 1504.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█       | 20286/154893 [00:34<02:04, 1078.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20412/154893 [00:34<02:18, 972.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20523/154893 [00:34<02:31, 886.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20621/154893 [00:35<02:43, 821.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20710/154893 [00:35<02:58, 751.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20790/154893 [00:35<03:09, 708.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  13%|█▏       | 20864/154893 [00:35<03:10, 704.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 20937/154893 [00:35<03:09, 705.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21014/154893 [00:35<03:06, 717.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21110/154893 [00:35<02:51, 778.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21190/154893 [00:35<03:01, 737.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21265/154893 [00:36<03:09, 705.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21372/154893 [00:36<02:48, 794.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▏       | 21453/154893 [00:36<02:59, 744.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21529/154893 [00:36<03:04, 721.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21603/154893 [00:36<03:22, 656.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21671/154893 [00:36<03:21, 662.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21739/154893 [00:36<03:27, 642.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21804/154893 [00:36<03:31, 628.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21881/154893 [00:36<03:20, 664.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 21956/154893 [00:37<03:14, 685.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22028/154893 [00:37<03:12, 691.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22099/154893 [00:37<03:25, 644.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22165/154893 [00:37<03:32, 625.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22229/154893 [00:37<03:38, 606.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22305/154893 [00:37<03:31, 628.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22369/154893 [00:37<03:30, 630.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  14%|█▎       | 22433/154893 [00:37<03:32, 622.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22496/154893 [00:37<03:35, 615.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22558/154893 [00:38<03:40, 601.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22619/154893 [00:38<03:53, 565.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22696/154893 [00:38<03:34, 616.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22759/154893 [00:38<03:33, 618.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22826/154893 [00:38<03:29, 629.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22909/154893 [00:38<03:13, 681.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 22978/154893 [00:38<03:27, 636.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23043/154893 [00:38<03:38, 603.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23131/154893 [00:38<03:14, 678.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23201/154893 [00:39<03:27, 633.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23266/154893 [00:39<03:34, 612.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23329/154893 [00:39<03:38, 601.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23390/154893 [00:39<03:49, 572.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23462/154893 [00:39<03:36, 607.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23538/154893 [00:39<03:22, 648.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▎       | 23604/154893 [00:39<03:37, 602.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23666/154893 [00:39<03:43, 585.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23728/154893 [00:39<03:46, 580.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23788/154893 [00:40<03:47, 576.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23847/154893 [00:40<03:56, 553.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23903/154893 [00:40<04:05, 533.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 23957/154893 [00:40<04:29, 485.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  15%|█▍       | 24008/154893 [00:40<04:29, 484.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24058/154893 [00:40<04:42, 463.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24111/154893 [00:40<04:32, 479.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24160/154893 [00:40<04:43, 461.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24208/154893 [00:41<04:40, 465.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24255/154893 [00:41<04:44, 459.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24302/154893 [00:41<04:58, 437.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24357/154893 [00:41<04:39, 466.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24410/154893 [00:41<04:29, 483.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24465/154893 [00:41<04:23, 495.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24519/154893 [00:41<04:16, 508.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24586/154893 [00:41<03:55, 552.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24646/154893 [00:41<03:50, 564.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24709/154893 [00:41<03:44, 580.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24775/154893 [00:42<03:42, 583.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24856/154893 [00:42<03:20, 649.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24922/154893 [00:42<03:21, 643.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 24997/154893 [00:42<03:12, 673.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25066/154893 [00:42<03:14, 667.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25140/154893 [00:42<03:18, 652.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25222/154893 [00:42<03:06, 694.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25293/154893 [00:42<03:08, 687.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25374/154893 [00:42<02:59, 721.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  16%|█▍       | 25456/154893 [00:42<02:52, 748.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▍       | 25607/154893 [00:43<02:14, 961.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▍       | 25711/154893 [00:43<02:11, 980.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▍       | 25810/154893 [00:43<02:28, 870.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 25900/154893 [00:43<02:40, 804.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 25988/154893 [00:43<02:37, 816.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26074/154893 [00:43<02:48, 765.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26153/154893 [00:43<02:55, 734.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26228/154893 [00:43<03:07, 687.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26298/154893 [00:44<03:06, 690.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26368/154893 [00:44<03:17, 652.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26434/154893 [00:44<03:27, 618.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26497/154893 [00:44<03:31, 607.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26559/154893 [00:44<03:54, 547.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26622/154893 [00:44<03:48, 561.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26686/154893 [00:44<03:41, 577.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26746/154893 [00:44<03:51, 553.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26802/154893 [00:44<03:59, 535.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26857/154893 [00:45<04:01, 530.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26911/154893 [00:45<04:05, 521.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 26964/154893 [00:45<05:50, 365.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  17%|█▌       | 27055/154893 [00:45<04:57, 429.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27158/154893 [00:45<05:03, 421.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27247/154893 [00:45<04:10, 509.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27307/154893 [00:46<06:24, 331.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27384/154893 [00:46<05:17, 401.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27439/154893 [00:46<05:27, 389.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27492/154893 [00:46<05:06, 416.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27551/154893 [00:46<04:42, 450.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27610/154893 [00:46<04:28, 473.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27665/154893 [00:47<04:22, 484.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27725/154893 [00:47<04:25, 479.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27797/154893 [00:47<03:57, 535.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27897/154893 [00:47<03:18, 638.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▌       | 27964/154893 [00:47<03:28, 608.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28028/154893 [00:47<03:34, 592.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28092/154893 [00:47<03:29, 605.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28154/154893 [00:47<03:34, 591.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28214/154893 [00:47<03:54, 540.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28289/154893 [00:48<03:34, 589.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28352/154893 [00:48<03:47, 555.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28468/154893 [00:48<02:56, 714.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28544/154893 [00:48<03:02, 693.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  18%|█▋       | 28618/154893 [00:48<03:03, 687.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28689/154893 [00:48<03:12, 655.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28756/154893 [00:48<03:20, 627.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28820/154893 [00:48<03:40, 572.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28879/154893 [00:49<03:38, 576.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28938/154893 [00:49<03:45, 558.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 28997/154893 [00:49<03:51, 544.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29054/154893 [00:49<03:48, 550.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29117/154893 [00:49<03:40, 571.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29175/154893 [00:49<03:53, 538.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29231/154893 [00:49<04:07, 507.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29323/154893 [00:49<03:24, 615.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29387/154893 [00:49<03:40, 569.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29449/154893 [00:50<03:36, 578.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29517/154893 [00:50<03:26, 605.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29590/154893 [00:50<03:16, 637.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29655/154893 [00:50<03:22, 617.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29722/154893 [00:50<03:18, 630.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29798/154893 [00:50<03:08, 664.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29866/154893 [00:50<03:26, 604.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29929/154893 [00:50<03:25, 608.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 29991/154893 [00:50<03:37, 573.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 30050/154893 [00:51<03:58, 522.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▋       | 30105/154893 [00:51<04:05, 508.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  19%|█▊       | 30157/154893 [00:51<04:11, 496.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30208/154893 [00:51<04:18, 482.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30257/154893 [00:51<04:32, 456.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30304/154893 [00:51<04:33, 454.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30359/154893 [00:51<04:23, 471.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30414/154893 [00:51<04:12, 492.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30470/154893 [00:51<04:05, 507.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30522/154893 [00:52<04:32, 456.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30573/154893 [00:52<04:30, 460.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30620/154893 [00:52<04:49, 429.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30670/154893 [00:52<04:37, 446.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30734/154893 [00:52<04:18, 480.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30783/154893 [00:52<04:53, 422.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30827/154893 [00:52<06:33, 315.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30887/154893 [00:53<05:32, 372.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30930/154893 [00:53<05:28, 377.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 30972/154893 [00:53<07:06, 290.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31066/154893 [00:53<04:52, 423.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31119/154893 [00:53<04:42, 438.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31188/154893 [00:53<04:09, 495.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31281/154893 [00:53<03:24, 603.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31357/154893 [00:53<03:32, 581.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31465/154893 [00:54<02:54, 705.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31549/154893 [00:54<02:46, 740.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31628/154893 [00:54<03:00, 682.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  20%|█▊       | 31703/154893 [00:54<03:13, 636.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 31849/154893 [00:54<02:26, 842.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 31939/154893 [00:54<02:49, 727.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 32018/154893 [00:54<02:58, 689.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 32093/154893 [00:55<03:52, 528.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 32157/154893 [00:55<03:44, 547.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▊       | 32219/154893 [00:55<03:58, 514.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32275/154893 [00:55<03:57, 515.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32331/154893 [00:55<04:00, 510.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32386/154893 [00:55<04:05, 498.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32438/154893 [00:55<04:29, 454.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32485/154893 [00:55<04:33, 447.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32539/154893 [00:55<04:20, 470.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32589/154893 [00:56<04:21, 467.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32637/154893 [00:56<04:33, 447.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32683/154893 [00:56<04:35, 444.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32728/154893 [00:56<04:52, 417.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32771/154893 [00:56<04:56, 412.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32813/154893 [00:56<04:58, 409.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32857/154893 [00:56<04:56, 411.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32913/154893 [00:56<04:38, 438.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 32971/154893 [00:56<04:14, 478.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 33020/154893 [00:57<04:27, 455.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 33090/154893 [00:57<03:59, 509.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 33161/154893 [00:57<03:39, 555.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 33227/154893 [00:57<03:30, 579.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  21%|█▉       | 33286/154893 [00:57<03:32, 571.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33351/154893 [00:57<03:26, 588.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33415/154893 [00:57<03:26, 589.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33484/154893 [00:57<03:16, 617.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33559/154893 [00:57<03:07, 646.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33624/154893 [00:58<03:19, 607.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33724/154893 [00:58<02:48, 717.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33797/154893 [00:58<02:51, 707.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 33911/154893 [00:58<02:25, 828.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 34042/154893 [00:58<02:18, 873.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 34130/154893 [00:58<02:23, 838.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 34214/154893 [00:58<02:26, 823.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 34298/154893 [00:58<02:36, 770.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|█▉       | 34376/154893 [00:58<02:54, 690.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34447/154893 [00:59<02:56, 682.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34519/154893 [00:59<03:01, 663.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34586/154893 [00:59<03:02, 659.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34653/154893 [00:59<03:14, 617.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34717/154893 [00:59<03:27, 579.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34776/154893 [00:59<03:42, 539.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  22%|██       | 34831/154893 [00:59<03:57, 504.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 34908/154893 [00:59<03:31, 568.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 34981/154893 [01:00<03:17, 608.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35053/154893 [01:00<04:13, 472.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35146/154893 [01:00<03:32, 563.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35209/154893 [01:00<06:34, 303.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35297/154893 [01:00<05:10, 385.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35355/154893 [01:01<05:02, 395.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35409/154893 [01:01<04:43, 421.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35469/154893 [01:01<04:19, 459.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35525/154893 [01:01<04:07, 482.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35581/154893 [01:01<04:06, 483.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35635/154893 [01:01<04:04, 487.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35688/154893 [01:01<04:01, 493.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35756/154893 [01:01<03:39, 541.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35831/154893 [01:01<03:23, 585.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35892/154893 [01:02<03:34, 554.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 35972/154893 [01:02<03:45, 526.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36087/154893 [01:02<02:57, 667.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36158/154893 [01:02<03:11, 618.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36223/154893 [01:02<03:20, 591.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36284/154893 [01:02<03:36, 548.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36341/154893 [01:02<03:50, 513.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  23%|██       | 36397/154893 [01:02<03:45, 524.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██       | 36453/154893 [01:03<03:45, 524.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██       | 36508/154893 [01:03<03:54, 505.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██       | 36560/154893 [01:03<03:56, 499.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36632/154893 [01:03<03:32, 556.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36697/154893 [01:03<03:23, 580.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36767/154893 [01:03<03:13, 611.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36829/154893 [01:03<03:31, 559.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36887/154893 [01:03<03:38, 540.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36942/154893 [01:03<03:49, 513.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 36994/154893 [01:04<03:51, 510.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37046/154893 [01:04<03:58, 494.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37096/154893 [01:04<04:03, 482.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37150/154893 [01:04<03:56, 498.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37205/154893 [01:04<03:52, 507.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37256/154893 [01:04<03:54, 502.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37313/154893 [01:04<03:49, 511.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37367/154893 [01:04<03:49, 512.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37420/154893 [01:04<03:58, 493.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37470/154893 [01:05<04:02, 484.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37519/154893 [01:05<04:30, 434.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37581/154893 [01:05<04:04, 478.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37641/154893 [01:05<03:49, 510.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37694/154893 [01:05<03:49, 509.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37769/154893 [01:05<03:31, 554.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37839/154893 [01:05<03:17, 594.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  24%|██▏      | 37899/154893 [01:05<03:25, 569.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 37957/154893 [01:05<03:33, 548.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38019/154893 [01:06<03:26, 564.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38090/154893 [01:06<03:13, 602.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38162/154893 [01:06<03:13, 602.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38255/154893 [01:06<02:48, 692.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38357/154893 [01:06<02:30, 774.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38436/154893 [01:06<02:41, 720.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38529/154893 [01:06<02:31, 769.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▏      | 38610/154893 [01:06<02:33, 755.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 38754/154893 [01:06<02:03, 940.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██      | 38894/154893 [01:07<01:48, 1064.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██      | 39004/154893 [01:07<01:51, 1043.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 39110/154893 [01:07<02:02, 945.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 39207/154893 [01:07<02:16, 848.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 39296/154893 [01:07<02:24, 797.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 39378/154893 [01:07<02:30, 768.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  25%|██▎      | 39458/154893 [01:07<02:31, 763.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39538/154893 [01:07<02:37, 732.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39614/154893 [01:07<02:36, 737.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39689/154893 [01:08<02:52, 667.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39759/154893 [01:08<02:52, 668.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39828/154893 [01:08<03:01, 633.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39893/154893 [01:08<03:20, 573.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 39954/154893 [01:08<03:17, 582.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40014/154893 [01:08<03:28, 551.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40070/154893 [01:08<03:32, 540.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40125/154893 [01:08<03:34, 535.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40179/154893 [01:09<03:35, 532.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40233/154893 [01:09<03:36, 529.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40290/154893 [01:09<03:32, 539.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40366/154893 [01:09<03:18, 577.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40435/154893 [01:09<03:10, 602.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40496/154893 [01:09<03:28, 548.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40552/154893 [01:09<03:46, 505.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40617/154893 [01:09<03:31, 539.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40675/154893 [01:10<04:11, 453.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40748/154893 [01:10<03:39, 520.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▎      | 40809/154893 [01:10<04:11, 452.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▍      | 40882/154893 [01:10<03:41, 515.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▍      | 40945/154893 [01:10<03:30, 540.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  26%|██▍      | 41003/154893 [01:10<04:24, 430.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41067/154893 [01:10<04:13, 448.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41122/154893 [01:10<04:06, 461.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41187/154893 [01:11<03:45, 505.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41257/154893 [01:11<03:29, 541.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41320/154893 [01:11<03:21, 564.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41391/154893 [01:11<03:16, 576.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41451/154893 [01:11<03:25, 551.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41510/154893 [01:11<03:35, 525.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41565/154893 [01:11<03:41, 511.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41627/154893 [01:11<03:30, 539.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41708/154893 [01:11<03:04, 612.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41771/154893 [01:12<03:13, 585.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41845/154893 [01:12<03:00, 627.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41912/154893 [01:12<02:58, 633.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 41977/154893 [01:12<03:31, 532.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42034/154893 [01:12<03:36, 520.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42096/154893 [01:12<03:55, 479.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42162/154893 [01:12<03:37, 518.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42218/154893 [01:12<04:08, 453.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42268/154893 [01:13<04:17, 437.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42314/154893 [01:13<04:24, 424.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42359/154893 [01:13<04:30, 415.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42402/154893 [01:13<04:30, 415.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42448/154893 [01:13<04:23, 426.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42493/154893 [01:13<04:27, 419.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42545/154893 [01:13<04:12, 445.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  27%|██▍      | 42591/154893 [01:13<04:17, 436.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42635/154893 [01:13<04:20, 431.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42688/154893 [01:14<04:09, 449.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42766/154893 [01:14<03:26, 542.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42843/154893 [01:14<03:06, 601.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42904/154893 [01:14<03:14, 574.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▍      | 42978/154893 [01:14<03:03, 609.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43047/154893 [01:14<02:58, 627.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43111/154893 [01:14<03:02, 612.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43174/154893 [01:14<03:07, 594.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43245/154893 [01:14<02:58, 625.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43308/154893 [01:15<02:59, 621.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43398/154893 [01:15<02:43, 681.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43498/154893 [01:15<02:25, 765.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43592/154893 [01:15<02:17, 810.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43674/154893 [01:15<02:18, 801.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43755/154893 [01:15<02:23, 776.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43834/154893 [01:15<02:27, 755.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43914/154893 [01:15<02:29, 744.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 43989/154893 [01:15<02:44, 673.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 44059/154893 [01:16<02:48, 659.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  28%|██▌      | 44126/154893 [01:16<02:59, 615.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44189/154893 [01:16<03:07, 588.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44249/154893 [01:16<03:19, 554.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44307/154893 [01:16<03:17, 559.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44398/154893 [01:16<02:50, 647.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44470/154893 [01:16<02:45, 666.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44550/154893 [01:16<02:38, 696.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44621/154893 [01:16<02:39, 689.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44692/154893 [01:17<02:43, 672.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44773/154893 [01:17<02:34, 711.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44853/154893 [01:17<02:29, 736.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 44928/154893 [01:17<02:31, 724.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 45001/154893 [01:17<02:38, 693.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 45072/154893 [01:17<02:44, 669.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▌      | 45140/154893 [01:17<02:49, 647.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45215/154893 [01:17<02:42, 675.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45289/154893 [01:17<02:38, 691.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45365/154893 [01:17<02:34, 707.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45438/154893 [01:18<02:41, 677.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45513/154893 [01:18<02:38, 689.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45583/154893 [01:18<02:38, 688.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  29%|██▋      | 45653/154893 [01:18<02:39, 685.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 45722/154893 [01:18<03:01, 601.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 45786/154893 [01:18<03:00, 605.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 45850/154893 [01:18<03:03, 595.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 45911/154893 [01:18<03:07, 580.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 45971/154893 [01:19<03:11, 570.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46030/154893 [01:19<03:18, 547.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46087/154893 [01:19<03:18, 547.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46143/154893 [01:19<03:20, 542.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46198/154893 [01:19<03:24, 531.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46256/154893 [01:19<03:19, 543.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46311/154893 [01:19<03:19, 544.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46366/154893 [01:19<03:26, 526.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46419/154893 [01:19<03:29, 517.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46477/154893 [01:19<03:23, 533.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46539/154893 [01:20<03:15, 553.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46596/154893 [01:20<03:24, 530.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46652/154893 [01:20<03:54, 461.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46700/154893 [01:20<04:14, 425.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46747/154893 [01:20<04:22, 412.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46795/154893 [01:20<04:12, 428.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46863/154893 [01:20<03:39, 491.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 46933/154893 [01:20<03:17, 545.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 47032/154893 [01:21<02:43, 661.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 47101/154893 [01:21<02:49, 636.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 47166/154893 [01:21<03:04, 582.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  30%|██▋      | 47227/154893 [01:21<03:15, 549.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▋      | 47285/154893 [01:21<03:24, 526.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47341/154893 [01:21<03:25, 523.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47394/154893 [01:21<03:26, 521.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47456/154893 [01:21<03:16, 547.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47515/154893 [01:21<03:15, 549.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47571/154893 [01:22<03:26, 519.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47624/154893 [01:22<03:40, 486.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47674/154893 [01:22<03:40, 486.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47733/154893 [01:22<03:28, 514.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47806/154893 [01:22<03:08, 569.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47867/154893 [01:22<03:05, 577.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47927/154893 [01:22<03:04, 580.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 47993/154893 [01:22<02:57, 602.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48054/154893 [01:23<05:32, 321.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48143/154893 [01:23<04:11, 423.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48203/154893 [01:23<04:11, 424.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48258/154893 [01:23<04:22, 405.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48307/154893 [01:23<04:18, 412.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48355/154893 [01:23<04:42, 377.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48398/154893 [01:23<04:34, 387.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48441/154893 [01:24<08:21, 212.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48544/154893 [01:24<05:16, 335.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48596/154893 [01:24<04:58, 356.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48646/154893 [01:24<04:52, 363.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48692/154893 [01:24<04:54, 360.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48735/154893 [01:25<04:50, 365.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  31%|██▊      | 48777/154893 [01:25<04:41, 377.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 48819/154893 [01:25<04:35, 384.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 48867/154893 [01:25<04:19, 408.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 48910/154893 [01:25<04:30, 391.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 48954/154893 [01:25<04:22, 404.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49001/154893 [01:25<04:14, 416.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49061/154893 [01:25<03:46, 467.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49141/154893 [01:25<03:27, 510.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49243/154893 [01:25<02:43, 645.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49310/154893 [01:26<02:43, 643.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49377/154893 [01:26<02:56, 597.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▊      | 49438/154893 [01:26<03:10, 553.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49495/154893 [01:26<03:23, 518.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49548/154893 [01:26<03:29, 504.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49600/154893 [01:26<03:27, 507.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49653/154893 [01:26<03:33, 491.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49713/154893 [01:26<03:24, 513.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49776/154893 [01:27<03:19, 527.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49834/154893 [01:27<03:14, 541.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49889/154893 [01:27<04:06, 425.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 49961/154893 [01:27<03:32, 494.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50015/154893 [01:27<04:04, 428.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50063/154893 [01:27<04:21, 401.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50107/154893 [01:27<04:58, 351.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50150/154893 [01:28<04:49, 362.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50189/154893 [01:28<05:01, 347.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50232/154893 [01:28<04:59, 348.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██▉      | 50284/154893 [01:28<04:28, 389.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  32%|██���      | 50325/154893 [01:28<04:37, 377.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50372/154893 [01:28<04:22, 398.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50416/154893 [01:28<04:14, 409.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50467/154893 [01:28<04:07, 421.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50510/154893 [01:28<04:16, 407.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50561/154893 [01:29<03:59, 435.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50606/154893 [01:29<04:10, 417.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50649/154893 [01:29<04:08, 419.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50692/154893 [01:29<04:07, 420.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50735/154893 [01:29<04:07, 420.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50778/154893 [01:29<04:20, 399.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50820/154893 [01:29<04:18, 403.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50861/154893 [01:29<04:25, 391.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50901/154893 [01:29<04:43, 366.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50949/154893 [01:30<04:25, 391.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 50989/154893 [01:30<04:27, 387.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51029/154893 [01:30<04:46, 362.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51068/154893 [01:30<04:46, 362.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51105/154893 [01:30<08:00, 216.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51225/154893 [01:30<04:18, 400.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51285/154893 [01:30<03:54, 442.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51343/154893 [01:31<03:47, 456.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51405/154893 [01:31<03:29, 494.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51462/154893 [01:31<03:30, 492.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51517/154893 [01:31<03:28, 496.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51571/154893 [01:31<03:28, 495.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|██▉      | 51624/154893 [01:31<03:26, 501.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|███      | 51683/154893 [01:31<03:16, 525.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|███      | 51745/154893 [01:31<03:07, 548.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|███      | 51806/154893 [01:31<03:02, 565.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  33%|███      | 51882/154893 [01:31<02:45, 621.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 51990/154893 [01:32<02:16, 754.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52103/154893 [01:32<01:59, 859.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52203/154893 [01:32<01:54, 899.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52300/154893 [01:32<01:51, 919.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52408/154893 [01:32<01:46, 966.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52519/154893 [01:32<01:43, 992.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52621/154893 [01:32<01:45, 973.53 examples/s][rank: 7] Global seed set to 3809433325\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\r\n",
-      "[2023-09-02 08:44:42,013] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52722/154893 [01:32<01:43, 983.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52821/154893 [01:32<01:43, 982.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|███      | 52929/154893 [01:33<01:44, 973.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|██▋     | 53040/154893 [01:33<01:40, 1012.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|██▋     | 53148/154893 [01:33<01:39, 1023.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|██▊     | 53278/154893 [01:33<01:32, 1103.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  34%|██▊     | 53390/154893 [01:33<01:33, 1091.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 1] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\r\n",
-      "[2023-09-02 08:44:42,797] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|██▊     | 53500/154893 [01:33<01:37, 1037.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|██▊     | 53606/154893 [01:33<01:40, 1003.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 6] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\r\n",
-      "[2023-09-02 08:44:43,030] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
-      "\r",
-      "Map (num_proc=64):  35%|███      | 53707/154893 [01:33<01:43, 976.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 53806/154893 [01:33<01:44, 963.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 53903/154893 [01:33<01:48, 934.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54004/154893 [01:34<01:46, 949.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54112/154893 [01:34<01:42, 981.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54212/154893 [01:34<01:49, 923.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54314/154893 [01:34<01:46, 947.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54423/154893 [01:34<01:42, 981.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54523/154893 [01:34<01:43, 968.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54621/154893 [01:34<01:48, 924.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|██▊     | 54766/154893 [01:34<01:33, 1071.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|██▊     | 54877/154893 [01:34<01:38, 1018.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  35%|███▏     | 54981/154893 [01:35<01:40, 998.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|██▊     | 55087/154893 [01:35<01:38, 1013.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▏     | 55190/154893 [01:35<01:48, 921.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|██▊     | 55335/154893 [01:35<01:34, 1054.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|██▊     | 55443/154893 [01:35<01:37, 1015.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|██▊     | 55547/154893 [01:35<01:38, 1010.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▏     | 55651/154893 [01:35<01:44, 950.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▏     | 55748/154893 [01:35<02:13, 742.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▏     | 55917/154893 [01:36<01:43, 959.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▎     | 56026/154893 [01:36<01:48, 915.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▎     | 56138/154893 [01:36<01:42, 964.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|███▎     | 56255/154893 [01:36<01:46, 928.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  36%|██▉     | 56428/154893 [01:36<01:27, 1127.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 56550/154893 [01:36<01:32, 1063.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 56662/154893 [01:36<01:35, 1031.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 56769/154893 [01:36<01:36, 1018.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 56874/154893 [01:36<01:35, 1026.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|███▎     | 56979/154893 [01:37<01:59, 822.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|███▎     | 57069/154893 [01:37<02:02, 801.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57269/154893 [01:37<01:29, 1093.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57389/154893 [01:37<01:31, 1061.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 4] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\r\n",
-      "[2023-09-02 08:44:46,953] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57502/154893 [01:37<01:36, 1013.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57660/154893 [01:37<01:23, 1159.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57804/154893 [01:37<01:18, 1232.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|██▉     | 57933/154893 [01:37<01:24, 1146.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  37%|███▎     | 58052/154893 [01:38<01:40, 967.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58157/154893 [01:38<01:38, 985.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58261/154893 [01:38<01:43, 930.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58371/154893 [01:38<01:39, 972.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58474/154893 [01:38<01:59, 809.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███     | 58664/154893 [01:38<01:30, 1064.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58783/154893 [01:38<01:37, 984.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58890/154893 [01:39<01:43, 930.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 58989/154893 [01:39<01:43, 925.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 59098/154893 [01:39<01:39, 964.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███▍     | 59199/154893 [01:39<01:40, 948.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 5] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\r\n",
-      "[2023-09-02 08:44:48,743] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███     | 59337/154893 [01:39<01:29, 1063.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███     | 59462/154893 [01:39<01:29, 1065.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  38%|███     | 59580/154893 [01:39<01:29, 1064.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███     | 59739/154893 [01:39<01:18, 1204.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███     | 59927/154893 [01:39<01:08, 1391.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███     | 60127/154893 [01:40<01:01, 1552.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 3] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\r\n",
-      "[2023-09-02 08:44:49,336] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███     | 60365/154893 [01:40<00:52, 1788.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███▏    | 60562/154893 [01:40<00:51, 1817.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███▏    | 60770/154893 [01:40<00:49, 1891.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███▏    | 60961/154893 [01:40<00:50, 1874.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  39%|███▏    | 61155/154893 [01:40<00:49, 1893.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 61400/154893 [01:40<00:45, 2053.42 examples/s][rank: 2] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\r\n",
-      "[2023-09-02 08:44:49,949] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 61646/154893 [01:40<00:43, 2147.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 61865/154893 [01:40<00:43, 2128.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 62079/154893 [01:40<00:45, 2031.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 62301/154893 [01:41<00:44, 2079.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 62511/154893 [01:41<00:46, 1987.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  40%|███▏    | 62712/154893 [01:41<00:48, 1899.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 62932/154893 [01:41<00:46, 1975.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 63134/154893 [01:41<00:49, 1862.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 63377/154893 [01:41<00:45, 2017.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 63623/154893 [01:41<00:42, 2141.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 63840/154893 [01:41<00:43, 2098.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 64052/154893 [01:41<00:43, 2066.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  41%|███▎    | 64261/154893 [01:42<00:45, 2008.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▎    | 64465/154893 [01:42<00:46, 1941.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▎    | 64664/154893 [01:42<00:47, 1915.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▎    | 64860/154893 [01:42<00:47, 1887.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▎    | 65050/154893 [01:42<00:50, 1788.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▎    | 65233/154893 [01:42<00:50, 1775.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▍    | 65412/154893 [01:42<00:51, 1734.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▍    | 65586/154893 [01:42<00:51, 1728.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  42%|███▍    | 65768/154893 [01:42<00:50, 1754.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 65947/154893 [01:43<00:51, 1724.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 66121/154893 [01:43<00:52, 1705.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 66310/154893 [01:43<00:50, 1755.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 66505/154893 [01:43<00:48, 1810.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 66699/154893 [01:43<00:47, 1847.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 66901/154893 [01:43<00:46, 1895.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 67112/154893 [01:43<00:45, 1947.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  43%|███▍    | 67315/154893 [01:43<00:44, 1955.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▍    | 67515/154893 [01:43<00:44, 1965.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▍    | 67714/154893 [01:43<00:49, 1757.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▌    | 68022/154893 [01:44<00:40, 2119.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▌    | 68241/154893 [01:44<00:41, 2084.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▌    | 68469/154893 [01:44<00:40, 2138.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▌    | 68696/154893 [01:44<00:39, 2173.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  44%|███▌    | 68916/154893 [01:44<00:41, 2071.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▌    | 69134/154893 [01:44<00:40, 2092.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▌    | 69346/154893 [01:44<00:42, 2035.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▌    | 69555/154893 [01:44<00:43, 1978.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▌    | 69756/154893 [01:44<00:43, 1977.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▌    | 70005/154893 [01:45<00:40, 2112.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▋    | 70218/154893 [01:45<00:41, 2048.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  45%|███▋    | 70427/154893 [01:45<00:42, 2005.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 70630/154893 [01:45<00:42, 1991.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 70833/154893 [01:45<00:42, 1982.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 71036/154893 [01:45<00:42, 1992.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 71237/154893 [01:45<00:44, 1884.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 71429/154893 [01:45<00:44, 1855.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 71627/154893 [01:45<00:44, 1889.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 71818/154893 [01:45<00:44, 1856.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  46%|███▋    | 72017/154893 [01:46<00:43, 1890.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▋    | 72210/154893 [01:46<00:44, 1840.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▋    | 72402/154893 [01:46<00:44, 1860.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▋    | 72604/154893 [01:46<00:43, 1906.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▊    | 72796/154893 [01:46<00:43, 1898.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▊    | 72987/154893 [01:46<00:45, 1797.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▊    | 73176/154893 [01:46<00:44, 1823.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▊    | 73366/154893 [01:46<00:44, 1817.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  47%|███▊    | 73552/154893 [01:46<00:44, 1824.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 73737/154893 [01:47<00:44, 1826.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 73921/154893 [01:47<00:45, 1788.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74101/154893 [01:47<00:45, 1787.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74283/154893 [01:47<00:45, 1769.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74462/154893 [01:47<00:45, 1773.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74643/154893 [01:47<00:45, 1763.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74821/154893 [01:47<00:46, 1737.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  48%|███▊    | 74996/154893 [01:47<00:46, 1722.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 75170/154893 [01:47<00:46, 1705.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 75396/154893 [01:47<00:42, 1863.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 75628/154893 [01:48<00:39, 1992.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 75857/154893 [01:48<00:38, 2079.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 76075/154893 [01:48<00:37, 2103.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 76288/154893 [01:48<00:38, 2044.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  49%|███▉    | 76498/154893 [01:48<00:38, 2059.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|███▉    | 76707/154893 [01:48<00:39, 1969.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|███▉    | 76912/154893 [01:48<00:39, 1957.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|███▉    | 77110/154893 [01:48<00:42, 1826.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|███▉    | 77301/154893 [01:48<00:42, 1846.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|████    | 77488/154893 [01:49<00:43, 1798.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|████    | 77744/154893 [01:49<00:38, 2002.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|████    | 77974/154893 [01:49<00:37, 2066.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  50%|████    | 78183/154893 [01:49<00:41, 1866.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 78374/154893 [01:49<00:43, 1764.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 78554/154893 [01:49<00:43, 1742.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 78731/154893 [01:49<00:43, 1741.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 78907/154893 [01:49<00:45, 1673.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 79111/154893 [01:49<00:43, 1738.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 79302/154893 [01:50<00:42, 1764.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 79515/154893 [01:50<00:45, 1662.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  51%|████    | 79754/154893 [01:50<00:40, 1855.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 79944/154893 [01:50<00:40, 1863.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 80134/154893 [01:50<00:39, 1872.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 80325/154893 [01:50<00:40, 1830.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 80511/154893 [01:50<00:40, 1823.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 80696/154893 [01:50<00:40, 1809.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 80879/154893 [01:50<00:41, 1784.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 81079/154893 [01:51<00:40, 1844.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  52%|████▏   | 81284/154893 [01:51<00:38, 1899.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▏   | 81546/154893 [01:51<00:34, 2110.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▏   | 81843/154893 [01:51<00:30, 2364.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▏   | 82122/154893 [01:51<00:29, 2480.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▎   | 82376/154893 [01:51<00:29, 2442.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▎   | 82621/154893 [01:51<00:32, 2251.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  53%|████▎   | 82851/154893 [01:51<00:35, 2057.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 83062/154893 [01:51<00:36, 1944.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 83262/154893 [01:52<00:38, 1850.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 83452/154893 [01:52<00:39, 1797.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 83634/154893 [01:52<00:39, 1793.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|███���▎   | 83816/154893 [01:52<00:40, 1768.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 83994/154893 [01:52<00:43, 1614.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  54%|████▎   | 84242/154893 [01:52<00:39, 1800.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▎   | 84455/154893 [01:52<00:37, 1874.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▎   | 84646/154893 [01:52<00:37, 1867.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 84836/154893 [01:52<00:39, 1764.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85016/154893 [01:53<00:40, 1730.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85191/154893 [01:53<00:41, 1665.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85360/154893 [01:53<00:42, 1628.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85526/154893 [01:53<00:43, 1579.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85685/154893 [01:53<00:43, 1576.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  55%|████▍   | 85851/154893 [01:53<00:43, 1598.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86012/154893 [01:53<00:44, 1564.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86171/154893 [01:53<00:45, 1508.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86328/154893 [01:53<00:45, 1513.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86481/154893 [01:54<00:46, 1457.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86633/154893 [01:54<00:46, 1471.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86794/154893 [01:54<00:45, 1504.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 86954/154893 [01:54<00:44, 1526.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▍   | 87118/154893 [01:54<00:43, 1557.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▌   | 87277/154893 [01:54<00:44, 1524.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  56%|████▌   | 87458/154893 [01:54<00:42, 1588.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 87620/154893 [01:54<00:43, 1548.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 87776/154893 [01:54<00:44, 1524.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 87932/154893 [01:54<00:43, 1528.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88088/154893 [01:55<00:43, 1525.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88245/154893 [01:55<00:43, 1532.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88401/154893 [01:55<00:43, 1537.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88555/154893 [01:55<00:43, 1519.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88713/154893 [01:55<00:43, 1525.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 88869/154893 [01:55<00:43, 1528.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  57%|████▌   | 89024/154893 [01:55<00:44, 1491.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▌   | 89182/154893 [01:55<00:43, 1510.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▌   | 89335/154893 [01:55<00:43, 1497.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▌   | 89485/154893 [01:55<00:44, 1476.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 89633/154893 [01:56<00:44, 1468.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 89790/154893 [01:56<00:43, 1490.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 89946/154893 [01:56<00:43, 1509.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 90107/154893 [01:56<00:42, 1536.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 90261/154893 [01:56<00:43, 1500.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 90413/154893 [01:56<00:44, 1457.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  58%|████▋   | 90560/154893 [01:56<00:44, 1439.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 90715/154893 [01:56<00:43, 1469.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 90866/154893 [01:56<00:43, 1467.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91018/154893 [01:57<00:43, 1469.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91167/154893 [01:57<00:43, 1463.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91322/154893 [01:57<00:42, 1485.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91472/154893 [01:57<00:42, 1487.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91628/154893 [01:57<00:41, 1507.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91787/154893 [01:57<00:41, 1527.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▋   | 91943/154893 [01:57<00:41, 1520.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  59%|████▊   | 92160/154893 [01:57<00:36, 1711.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 92333/154893 [01:57<00:36, 1715.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 92506/154893 [01:57<00:37, 1656.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 92684/154893 [01:58<00:36, 1683.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 92892/154893 [01:58<00:35, 1758.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 93070/154893 [01:58<00:37, 1666.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 93238/154893 [01:58<00:38, 1592.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 93401/154893 [01:58<00:43, 1413.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  60%|████▊   | 93585/154893 [01:58<00:40, 1523.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▊   | 93753/154893 [01:58<00:39, 1560.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▊   | 93917/154893 [01:58<00:38, 1576.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▊   | 94081/154893 [01:58<00:38, 1578.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▊   | 94243/154893 [01:59<00:38, 1570.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 94404/154893 [01:59<00:39, 1527.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 94559/154893 [01:59<00:40, 1483.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 94709/154893 [01:59<00:41, 1458.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 94857/154893 [01:59<00:41, 1437.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 95002/154893 [01:59<00:41, 1436.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  61%|████▉   | 95146/154893 [01:59<00:41, 1433.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 95291/154893 [01:59<00:42, 1413.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 95441/154893 [01:59<00:41, 1434.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 95586/154893 [01:59<00:41, 1422.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 95730/154893 [02:00<00:42, 1382.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 95882/154893 [02:00<00:41, 1420.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 96044/154893 [02:00<00:40, 1460.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 96210/154893 [02:00<00:38, 1511.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 96398/154893 [02:00<00:36, 1595.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 96558/154893 [02:00<00:37, 1536.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  62%|████▉   | 96713/154893 [02:00<00:39, 1484.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 96862/154893 [02:00<00:40, 1433.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97006/154893 [02:00<00:41, 1386.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97149/154893 [02:01<00:41, 1397.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97290/154893 [02:01<00:42, 1341.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97426/154893 [02:01<00:43, 1318.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97560/154893 [02:01<00:43, 1319.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97693/154893 [02:01<00:43, 1301.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97831/154893 [02:01<00:43, 1315.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 97966/154893 [02:01<00:43, 1309.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 98120/154893 [02:01<00:41, 1375.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  63%|█████   | 98259/154893 [02:01<00:41, 1357.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 98401/154893 [02:02<00:41, 1363.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 98538/154893 [02:02<00:42, 1329.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 98677/154893 [02:02<00:42, 1337.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 98818/154893 [02:02<00:41, 1345.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 98970/154893 [02:02<00:40, 1393.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████   | 99111/154893 [02:02<00:40, 1382.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████▏  | 99250/154893 [02:02<00:40, 1382.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████▏  | 99390/154893 [02:02<00:40, 1378.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████▏  | 99534/154893 [02:02<00:39, 1394.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████▏  | 99674/154893 [02:02<00:39, 1395.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  64%|█████▏  | 99815/154893 [02:03<00:39, 1380.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|█████▏  | 99956/154893 [02:03<00:39, 1383.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100101/154893 [02:03<00:39, 1400.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100242/154893 [02:03<00:39, 1390.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100383/154893 [02:03<00:39, 1382.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100528/154893 [02:03<00:38, 1399.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100735/154893 [02:03<00:33, 1593.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 100895/154893 [02:03<00:36, 1464.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 101044/154893 [02:03<00:37, 1431.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 101190/154893 [02:04<00:38, 1394.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  65%|████▌  | 101332/154893 [02:04<00:38, 1379.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 101472/154893 [02:04<00:40, 1328.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 101606/154893 [02:04<00:40, 1308.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 101738/154893 [02:04<00:41, 1278.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 101867/154893 [02:04<00:41, 1276.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 101996/154893 [02:04<00:42, 1235.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 102133/154893 [02:04<00:41, 1273.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▌  | 102262/154893 [02:04<00:41, 1275.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▋  | 102391/154893 [02:04<00:41, 1259.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▋  | 102518/154893 [02:05<00:42, 1237.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▋  | 102650/154893 [02:05<00:41, 1255.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▋  | 102777/154893 [02:05<00:42, 1232.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  66%|████▋  | 102901/154893 [02:05<00:42, 1223.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103025/154893 [02:05<00:43, 1200.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103157/154893 [02:05<00:42, 1226.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103280/154893 [02:05<00:42, 1205.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103405/154893 [02:05<00:42, 1207.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103527/154893 [02:05<00:42, 1201.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103648/154893 [02:06<00:44, 1158.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103777/154893 [02:06<00:42, 1194.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 103907/154893 [02:06<00:42, 1212.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 104032/154893 [02:06<00:42, 1208.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 104165/154893 [02:06<00:40, 1240.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 104294/154893 [02:06<00:40, 1250.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  67%|████▋  | 104420/154893 [02:06<00:40, 1252.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▋  | 104568/154893 [02:06<00:38, 1308.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▋  | 104716/154893 [02:06<00:36, 1358.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▋  | 104899/154893 [02:06<00:33, 1488.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▋  | 105089/154893 [02:07<00:30, 1610.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105251/154893 [02:07<00:31, 1569.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105410/154893 [02:07<00:32, 1500.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105561/154893 [02:07<00:33, 1476.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105710/154893 [02:07<00:34, 1410.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105852/154893 [02:07<00:34, 1403.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  68%|████▊  | 105993/154893 [02:07<00:35, 1377.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106148/154893 [02:07<00:34, 1409.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106291/154893 [02:07<00:34, 1389.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106447/154893 [02:08<00:34, 1424.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106597/154893 [02:08<00:33, 1443.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106754/154893 [02:08<00:32, 1464.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 106930/154893 [02:08<00:30, 1548.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 107095/154893 [02:08<00:30, 1566.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 107252/154893 [02:08<00:31, 1515.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 107405/154893 [02:08<00:31, 1505.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  69%|████▊  | 107557/154893 [02:08<00:33, 1428.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▊  | 107701/154893 [02:08<00:33, 1399.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▊  | 107842/154893 [02:08<00:33, 1388.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 107982/154893 [02:09<00:34, 1359.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108124/154893 [02:09<00:34, 1373.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108262/154893 [02:09<00:35, 1322.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108396/154893 [02:09<00:36, 1277.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108526/154893 [02:09<00:38, 1217.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108652/154893 [02:09<00:38, 1200.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108774/154893 [02:09<00:40, 1148.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 108890/154893 [02:09<00:41, 1117.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 109020/154893 [02:09<00:39, 1161.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  70%|████▉  | 109161/154893 [02:10<00:37, 1230.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 109304/154893 [02:10<00:35, 1280.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 109450/154893 [02:10<00:34, 1317.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 109587/154893 [02:10<00:34, 1308.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 109737/154893 [02:10<00:33, 1353.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 109874/154893 [02:10<00:35, 1263.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 110005/154893 [02:10<00:36, 1237.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 110143/154893 [02:10<00:35, 1276.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 110274/154893 [02:10<00:36, 1232.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 110399/154893 [02:11<00:36, 1230.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|████▉  | 110524/154893 [02:11<00:37, 1192.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  71%|█████  | 110644/154893 [02:11<00:37, 1176.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 110768/154893 [02:11<00:37, 1170.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 110892/154893 [02:11<00:37, 1188.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111014/154893 [02:11<00:37, 1165.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111132/154893 [02:11<00:37, 1163.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111250/154893 [02:11<00:37, 1160.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111389/154893 [02:11<00:35, 1221.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111513/154893 [02:11<00:35, 1225.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111646/154893 [02:12<00:34, 1249.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111783/154893 [02:12<00:33, 1281.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 111915/154893 [02:12<00:34, 1230.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 112040/154893 [02:12<00:35, 1211.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  72%|█████  | 112167/154893 [02:12<00:34, 1223.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 112317/154893 [02:12<00:32, 1292.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 112473/154893 [02:12<00:31, 1361.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 112613/154893 [02:12<00:30, 1365.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 112751/154893 [02:12<00:33, 1276.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 112881/154893 [02:13<00:33, 1262.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 113011/154893 [02:13<00:33, 1236.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 113139/154893 [02:13<00:34, 1219.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████  | 113274/154893 [02:13<00:33, 1250.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████▏ | 113441/154893 [02:13<00:30, 1369.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████▏ | 113580/154893 [02:13<00:30, 1351.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  73%|█████▏ | 113717/154893 [02:13<00:32, 1269.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 113848/154893 [02:13<00:34, 1206.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 113971/154893 [02:13<00:34, 1176.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114094/154893 [02:14<00:34, 1190.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114215/154893 [02:14<00:34, 1166.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114335/154893 [02:14<00:34, 1175.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114456/154893 [02:14<00:34, 1182.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114576/154893 [02:14<00:34, 1167.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114694/154893 [02:14<00:35, 1138.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114810/154893 [02:14<00:35, 1135.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 114924/154893 [02:14<00:35, 1116.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 115039/154893 [02:14<00:35, 1110.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 115160/154893 [02:14<00:35, 1132.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 115275/154893 [02:15<00:35, 1113.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  74%|█████▏ | 115389/154893 [02:15<00:35, 1117.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▏ | 115522/154893 [02:15<00:33, 1177.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▏ | 115648/154893 [02:15<00:32, 1197.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▏ | 115772/154893 [02:15<00:32, 1205.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▏ | 115970/154893 [02:15<00:27, 1430.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▏ | 116114/154893 [02:15<00:27, 1386.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116256/154893 [02:15<00:27, 1394.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116396/154893 [02:15<00:29, 1287.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116528/154893 [02:16<00:31, 1226.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116653/154893 [02:16<00:32, 1171.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116773/154893 [02:16<00:33, 1143.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  75%|█████▎ | 116889/154893 [02:16<00:33, 1125.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117003/154893 [02:16<00:33, 1126.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117159/154893 [02:16<00:30, 1245.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117285/154893 [02:16<00:30, 1213.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117409/154893 [02:16<00:31, 1198.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117531/154893 [02:16<00:32, 1153.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117647/154893 [02:17<00:32, 1154.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117763/154893 [02:17<00:32, 1128.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 117877/154893 [02:17<00:32, 1124.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 118003/154893 [02:17<00:31, 1163.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 118126/154893 [02:17<00:31, 1167.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 118244/154893 [02:17<00:31, 1168.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 118366/154893 [02:17<00:30, 1181.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  76%|█████▎ | 118486/154893 [02:17<00:31, 1171.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▎ | 118604/154893 [02:17<00:31, 1157.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▎ | 118728/154893 [02:17<00:30, 1172.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▎ | 118849/154893 [02:18<00:30, 1178.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 118971/154893 [02:18<00:30, 1190.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119091/154893 [02:18<00:30, 1175.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119225/154893 [02:18<00:29, 1200.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119347/154893 [02:18<00:31, 1128.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119467/154893 [02:18<00:30, 1145.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119585/154893 [02:18<00:30, 1141.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119700/154893 [02:18<00:31, 1128.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119837/154893 [02:18<00:29, 1193.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  77%|█████▍ | 119957/154893 [02:18<00:29, 1190.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120077/154893 [02:19<00:30, 1142.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120211/154893 [02:19<00:28, 1197.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120333/154893 [02:19<00:29, 1177.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120452/154893 [02:19<00:29, 1161.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120570/154893 [02:19<00:30, 1111.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120687/154893 [02:19<00:30, 1109.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120799/154893 [02:19<00:31, 1082.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 120917/154893 [02:19<00:30, 1098.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 121030/154893 [02:19<00:31, 1090.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 121159/154893 [02:20<00:30, 1123.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 121292/154893 [02:20<00:28, 1181.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  78%|█████▍ | 121468/154893 [02:20<00:24, 1340.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▍ | 121603/154893 [02:20<00:25, 1298.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 121769/154893 [02:20<00:23, 1400.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 121911/154893 [02:20<00:25, 1294.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122043/154893 [02:20<00:26, 1224.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122169/154893 [02:20<00:27, 1183.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122294/154893 [02:20<00:28, 1161.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122486/154893 [02:21<00:23, 1358.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122624/154893 [02:21<00:24, 1310.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122757/154893 [02:21<00:25, 1272.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 122886/154893 [02:21<00:26, 1229.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 123010/154893 [02:21<00:25, 1231.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  79%|█████▌ | 123134/154893 [02:21<00:26, 1196.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 123270/154893 [02:21<00:25, 1241.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 123412/154893 [02:21<00:24, 1288.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 123616/154893 [02:21<00:20, 1494.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 123767/154893 [02:22<00:21, 1473.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 123915/154893 [02:22<00:21, 1425.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 124059/154893 [02:22<00:23, 1336.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 124195/154893 [02:22<00:23, 1285.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 124326/154893 [02:22<00:24, 1237.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▌ | 124452/154893 [02:22<00:24, 1226.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  80%|█████▋ | 124577/154893 [02:22<00:25, 1196.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 124698/154893 [02:22<00:25, 1190.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 124818/154893 [02:22<00:25, 1171.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 124937/154893 [02:23<00:26, 1129.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 125051/154893 [02:23<00:27, 1102.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 125163/154893 [02:23<00:27, 1076.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 125273/154893 [02:23<00:28, 1038.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|█████▋ | 125379/154893 [02:23<00:28, 1021.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▍ | 125482/154893 [02:23<00:29, 991.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▍ | 125582/154893 [02:23<00:30, 968.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▍ | 125679/154893 [02:23<00:31, 915.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▍ | 125772/154893 [02:23<00:32, 902.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▌ | 125884/154893 [02:24<00:30, 947.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▌ | 125989/154893 [02:24<00:30, 950.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▌ | 126086/154893 [02:24<00:30, 951.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  81%|██████▌ | 126182/154893 [02:24<00:31, 918.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126284/154893 [02:24<00:30, 946.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126388/154893 [02:24<00:29, 972.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126486/154893 [02:24<00:29, 957.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126583/154893 [02:24<00:29, 952.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126680/154893 [02:24<00:29, 956.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126778/154893 [02:24<00:29, 954.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126879/154893 [02:25<00:28, 967.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 126977/154893 [02:25<00:28, 964.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127076/154893 [02:25<00:28, 967.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127174/154893 [02:25<00:28, 967.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127271/154893 [02:25<00:28, 964.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127368/154893 [02:25<00:29, 942.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127467/154893 [02:25<00:28, 955.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|██████▌ | 127572/154893 [02:25<00:27, 982.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  82%|█████▊ | 127683/154893 [02:25<00:26, 1016.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 127787/154893 [02:25<00:26, 1022.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 127903/154893 [02:26<00:25, 1062.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128010/154893 [02:26<00:25, 1039.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128135/154893 [02:26<00:24, 1100.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128247/154893 [02:26<00:24, 1088.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128369/154893 [02:26<00:23, 1106.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128480/154893 [02:26<00:24, 1098.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128602/154893 [02:26<00:23, 1117.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128714/154893 [02:26<00:23, 1110.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128827/154893 [02:26<00:24, 1084.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 128937/154893 [02:27<00:24, 1073.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 129045/154893 [02:27<00:24, 1060.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 129160/154893 [02:27<00:23, 1077.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  83%|█████▊ | 129268/154893 [02:27<00:23, 1075.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▊ | 129376/154893 [02:27<00:24, 1028.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▊ | 129495/154893 [02:27<00:23, 1065.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▊ | 129617/154893 [02:27<00:22, 1109.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▊ | 129764/154893 [02:27<00:20, 1210.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▊ | 129888/154893 [02:27<00:20, 1202.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130035/154893 [02:27<00:19, 1269.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130163/154893 [02:28<00:20, 1191.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130289/154893 [02:28<00:20, 1210.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130426/154893 [02:28<00:19, 1255.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130558/154893 [02:28<00:19, 1273.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130693/154893 [02:28<00:19, 1258.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  84%|█████▉ | 130822/154893 [02:28<00:20, 1162.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|█████▉ | 130940/154893 [02:28<00:22, 1086.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|█████▉ | 131052/154893 [02:28<00:22, 1051.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|█████▉ | 131165/154893 [02:28<00:22, 1058.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|█████▉ | 131273/154893 [02:29<00:23, 1014.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|█████▉ | 131376/154893 [02:29<00:23, 1006.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131479/154893 [02:29<00:24, 967.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131578/154893 [02:29<00:23, 971.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131677/154893 [02:29<00:24, 929.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131771/154893 [02:29<00:26, 876.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131866/154893 [02:29<00:25, 894.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 131957/154893 [02:29<00:26, 867.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 132053/154893 [02:29<00:25, 893.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 132158/154893 [02:30<00:24, 929.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 132252/154893 [02:30<00:24, 914.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  85%|██████▊ | 132355/154893 [02:30<00:24, 935.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132457/154893 [02:30<00:23, 949.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132553/154893 [02:30<00:23, 940.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132655/154893 [02:30<00:23, 952.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132753/154893 [02:30<00:23, 957.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132856/154893 [02:30<00:23, 940.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████▊ | 132964/154893 [02:30<00:23, 924.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133086/154893 [02:31<00:21, 1002.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133202/154893 [02:31<00:20, 1037.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133327/154893 [02:31<00:19, 1091.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133439/154893 [02:31<00:20, 1063.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133568/154893 [02:31<00:19, 1116.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133709/154893 [02:31<00:17, 1196.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133831/154893 [02:31<00:17, 1186.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  86%|██████ | 133952/154893 [02:31<00:19, 1101.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████ | 134068/154893 [02:31<00:18, 1107.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████ | 134182/154893 [02:32<00:19, 1052.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████ | 134291/154893 [02:32<00:19, 1056.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████ | 134402/154893 [02:32<00:19, 1064.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████ | 134511/154893 [02:32<00:20, 1010.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 134613/154893 [02:32<00:21, 961.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 134710/154893 [02:32<00:20, 961.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 134808/154893 [02:32<00:21, 927.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 134907/154893 [02:32<00:21, 936.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135003/154893 [02:32<00:21, 912.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135097/154893 [02:33<00:21, 919.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135196/154893 [02:33<00:21, 935.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135291/154893 [02:33<00:20, 935.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135386/154893 [02:33<00:21, 925.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  87%|██████▉ | 135497/154893 [02:33<00:19, 977.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 135623/154893 [02:33<00:18, 1059.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 135759/154893 [02:33<00:16, 1140.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 135875/154893 [02:33<00:17, 1076.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 135984/154893 [02:33<00:17, 1050.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 136090/154893 [02:33<00:17, 1045.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 136195/154893 [02:34<00:18, 1008.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|███████ | 136298/154893 [02:34<00:18, 986.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|███████ | 136399/154893 [02:34<00:18, 974.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|███████ | 136512/154893 [02:34<00:18, 985.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 136632/154893 [02:34<00:17, 1039.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 136746/154893 [02:34<00:17, 1058.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|██████▏| 136853/154893 [02:34<00:17, 1030.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|███████ | 136958/154893 [02:34<00:18, 958.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  88%|███████ | 137056/154893 [02:34<00:19, 916.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137149/154893 [02:35<00:20, 881.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137238/154893 [02:35<00:20, 849.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137324/154893 [02:35<00:21, 829.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137419/154893 [02:35<00:20, 861.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137506/154893 [02:35<00:21, 824.06 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137591/154893 [02:35<00:20, 825.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137682/154893 [02:35<00:20, 846.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137769/154893 [02:35<00:20, 833.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137862/154893 [02:35<00:19, 852.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████ | 137949/154893 [02:36<00:19, 856.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138042/154893 [02:36<00:19, 876.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138130/154893 [02:36<00:19, 848.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138219/154893 [02:36<00:19, 859.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138306/154893 [02:36<00:19, 856.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138392/154893 [02:36<00:19, 851.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138479/154893 [02:36<00:19, 827.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  89%|███████▏| 138562/154893 [02:36<00:20, 802.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 138644/154893 [02:36<00:20, 778.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 138730/154893 [02:36<00:20, 799.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 138812/154893 [02:37<00:20, 780.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 138896/154893 [02:37<00:20, 788.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 138977/154893 [02:37<00:20, 787.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139056/154893 [02:37<00:20, 768.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139136/154893 [02:37<00:20, 776.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139222/154893 [02:37<00:19, 799.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139305/154893 [02:37<00:19, 797.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139389/154893 [02:37<00:19, 806.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139470/154893 [02:37<00:19, 798.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139550/154893 [02:38<00:19, 780.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139640/154893 [02:38<00:18, 815.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139722/154893 [02:38<00:19, 777.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139809/154893 [02:38<00:18, 797.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139893/154893 [02:38<00:18, 804.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 139978/154893 [02:38<00:18, 808.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 140059/154893 [02:38<00:18, 797.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  90%|███████▏| 140144/154893 [02:38<00:18, 807.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▏| 140227/154893 [02:38<00:18, 811.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▏| 140310/154893 [02:38<00:18, 802.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140413/154893 [02:39<00:16, 861.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140521/154893 [02:39<00:15, 906.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140628/154893 [02:39<00:15, 935.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140732/154893 [02:39<00:14, 962.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140832/154893 [02:39<00:14, 971.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 140931/154893 [02:39<00:15, 913.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141024/154893 [02:39<00:16, 864.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141113/154893 [02:39<00:16, 813.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141198/154893 [02:39<00:16, 822.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141283/154893 [02:40<00:17, 765.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141361/154893 [02:40<00:18, 738.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141436/154893 [02:40<00:20, 661.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141505/154893 [02:40<00:20, 651.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141573/154893 [02:40<00:21, 629.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141637/154893 [02:40<00:21, 617.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  91%|███████▎| 141703/154893 [02:40<00:21, 626.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 141768/154893 [02:40<00:21, 621.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 141837/154893 [02:40<00:20, 635.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 141902/154893 [02:41<00:20, 628.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 141966/154893 [02:41<00:20, 625.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142031/154893 [02:41<00:20, 629.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142095/154893 [02:41<00:20, 623.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142163/154893 [02:41<00:20, 635.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142229/154893 [02:41<00:19, 641.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142294/154893 [02:41<00:20, 618.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142424/154893 [02:41<00:15, 809.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142523/154893 [02:41<00:14, 833.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142608/154893 [02:42<00:15, 777.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142688/154893 [02:42<00:16, 722.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▎| 142762/154893 [02:42<00:17, 696.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 142834/154893 [02:42<00:17, 686.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 142904/154893 [02:42<00:18, 660.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 142972/154893 [02:42<00:18, 644.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 143038/154893 [02:42<00:19, 615.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 143113/154893 [02:42<00:18, 641.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 143179/154893 [02:43<00:18, 620.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  92%|███████▍| 143250/154893 [02:43<00:18, 642.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143316/154893 [02:43<00:18, 631.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143388/154893 [02:43<00:17, 649.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143464/154893 [02:43<00:16, 680.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143534/154893 [02:43<00:16, 684.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143604/154893 [02:43<00:16, 679.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143673/154893 [02:43<00:16, 680.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143743/154893 [02:43<00:16, 658.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143810/154893 [02:43<00:17, 644.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143876/154893 [02:44<00:17, 646.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 143943/154893 [02:44<00:16, 648.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144009/154893 [02:44<00:17, 637.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144075/154893 [02:44<00:16, 638.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144139/154893 [02:44<00:17, 621.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144202/154893 [02:44<00:17, 613.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144266/154893 [02:44<00:17, 618.88 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144335/154893 [02:44<00:17, 616.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144404/154893 [02:44<00:16, 633.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144468/154893 [02:44<00:16, 629.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144534/154893 [02:45<00:16, 634.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144598/154893 [02:45<00:16, 618.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144665/154893 [02:45<00:16, 625.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144733/154893 [02:45<00:15, 635.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  93%|███████▍| 144798/154893 [02:45<00:15, 637.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▍| 144863/154893 [02:45<00:15, 629.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▍| 144928/154893 [02:45<00:15, 627.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▍| 145023/154893 [02:45<00:13, 720.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▍| 145113/154893 [02:45<00:12, 769.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145243/154893 [02:46<00:10, 925.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145348/154893 [02:46<00:09, 960.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145446/154893 [02:46<00:11, 814.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145533/154893 [02:46<00:12, 732.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145612/154893 [02:46<00:13, 673.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145683/154893 [02:46<00:13, 658.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145752/154893 [02:46<00:13, 662.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145821/154893 [02:46<00:13, 649.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145888/154893 [02:47<00:14, 641.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 145953/154893 [02:47<00:14, 619.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146017/154893 [02:47<00:14, 611.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146079/154893 [02:47<00:14, 596.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146139/154893 [02:47<00:14, 584.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146204/154893 [02:47<00:14, 598.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146265/154893 [02:47<00:14, 583.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  94%|███████▌| 146324/154893 [02:47<00:14, 576.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146383/154893 [02:47<00:15, 556.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146439/154893 [02:48<00:15, 554.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146496/154893 [02:48<00:15, 539.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146552/154893 [02:48<00:15, 537.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146607/154893 [02:48<00:15, 529.56 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146661/154893 [02:48<00:15, 526.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146716/154893 [02:48<00:15, 524.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146776/154893 [02:48<00:15, 540.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146832/154893 [02:48<00:14, 544.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146893/154893 [02:48<00:14, 550.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 146959/154893 [02:48<00:13, 579.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147018/154893 [02:49<00:14, 549.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147075/154893 [02:49<00:14, 529.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147133/154893 [02:49<00:14, 541.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147188/154893 [02:49<00:14, 523.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147242/154893 [02:49<00:14, 521.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147296/154893 [02:49<00:14, 521.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147350/154893 [02:49<00:15, 499.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147406/154893 [02:49<00:14, 506.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147459/154893 [02:49<00:14, 513.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147512/154893 [02:50<00:14, 513.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▌| 147594/154893 [02:50<00:12, 596.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▋| 147675/154893 [02:50<00:10, 657.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▋| 147756/154893 [02:50<00:10, 701.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▋| 147827/154893 [02:50<00:10, 702.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  95%|███████▋| 147899/154893 [02:50<00:10, 659.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 147967/154893 [02:50<00:11, 623.83 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148032/154893 [02:50<00:11, 585.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148093/154893 [02:50<00:12, 553.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148152/154893 [02:51<00:12, 548.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148208/154893 [02:51<00:12, 532.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148263/154893 [02:51<00:12, 521.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148317/154893 [02:51<00:12, 522.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148370/154893 [02:51<00:12, 504.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148424/154893 [02:51<00:12, 508.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148476/154893 [02:51<00:12, 510.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148528/154893 [02:51<00:12, 510.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148580/154893 [02:51<00:12, 503.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148634/154893 [02:52<00:12, 505.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148691/154893 [02:52<00:11, 521.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148767/154893 [02:52<00:10, 578.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148849/154893 [02:52<00:09, 642.12 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148914/154893 [02:52<00:09, 639.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 148987/154893 [02:52<00:08, 656.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149053/154893 [02:52<00:09, 605.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149115/154893 [02:52<00:10, 553.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149173/154893 [02:52<00:10, 533.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149228/154893 [02:53<00:10, 529.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149282/154893 [02:53<00:10, 526.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149336/154893 [02:53<00:10, 508.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149390/154893 [02:53<00:10, 516.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  96%|███████▋| 149442/154893 [02:53<00:10, 506.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149494/154893 [02:53<00:10, 494.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149544/154893 [02:53<00:10, 490.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149595/154893 [02:53<00:10, 485.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149644/154893 [02:53<00:10, 481.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149694/154893 [02:54<00:11, 464.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149742/154893 [02:54<00:11, 448.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149791/154893 [02:54<00:11, 457.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149838/154893 [02:54<00:11, 447.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149884/154893 [02:54<00:11, 444.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149930/154893 [02:54<00:11, 446.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 149983/154893 [02:54<00:10, 462.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▋| 150030/154893 [02:54<00:10, 460.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150078/154893 [02:54<00:10, 452.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150124/154893 [02:54<00:10, 435.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150168/154893 [02:55<00:10, 436.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150212/154893 [02:55<00:11, 422.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150258/154893 [02:55<00:10, 430.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150302/154893 [02:55<00:11, 413.72 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150344/154893 [02:55<00:11, 403.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150386/154893 [02:55<00:11, 401.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150428/154893 [02:55<00:11, 391.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150470/154893 [02:55<00:11, 389.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150510/154893 [02:55<00:11, 378.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150548/154893 [02:56<00:11, 372.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150587/154893 [02:56<00:12, 358.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150623/154893 [02:56<00:12, 338.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150659/154893 [02:56<00:13, 317.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150693/154893 [02:56<00:13, 300.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150725/154893 [02:56<00:14, 289.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150755/154893 [02:56<00:14, 290.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150785/154893 [02:56<00:14, 290.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150816/154893 [02:57<00:15, 270.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150844/154893 [02:57<00:15, 266.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150872/154893 [02:57<00:15, 265.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150902/154893 [02:57<00:14, 266.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150930/154893 [02:57<00:14, 267.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150960/154893 [02:57<00:15, 262.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  97%|███████▊| 150992/154893 [02:57<00:14, 263.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151022/154893 [02:57<00:14, 272.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151050/154893 [02:57<00:14, 269.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151077/154893 [02:58<00:14, 262.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151111/154893 [02:58<00:13, 284.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151145/154893 [02:58<00:12, 299.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151177/154893 [02:58<00:12, 299.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151211/154893 [02:58<00:11, 308.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151245/154893 [02:58<00:11, 315.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151277/154893 [02:58<00:11, 315.75 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151309/154893 [02:58<00:11, 314.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151343/154893 [02:58<00:11, 307.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151374/154893 [02:58<00:11, 306.81 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151405/154893 [02:59<00:11, 302.30 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151437/154893 [02:59<00:11, 306.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151468/154893 [02:59<00:11, 291.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151499/154893 [02:59<00:11, 291.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151529/154893 [02:59<00:11, 290.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151559/154893 [02:59<00:11, 286.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151591/154893 [02:59<00:11, 287.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151625/154893 [02:59<00:10, 299.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151665/154893 [02:59<00:10, 319.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151700/154893 [03:00<00:09, 325.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151737/154893 [03:00<00:09, 334.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151771/154893 [03:00<00:09, 333.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151817/154893 [03:00<00:08, 359.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151859/154893 [03:00<00:08, 365.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151899/154893 [03:00<00:08, 366.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151941/154893 [03:00<00:07, 379.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 151979/154893 [03:00<00:07, 365.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152026/154893 [03:00<00:07, 394.94 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152094/154893 [03:00<00:05, 470.87 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152196/154893 [03:01<00:04, 627.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152264/154893 [03:01<00:04, 629.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152329/154893 [03:01<00:04, 547.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152386/154893 [03:01<00:05, 499.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▊| 152438/154893 [03:01<00:05, 438.43 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▉| 152486/154893 [03:01<00:05, 410.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  98%|███████▉| 152530/154893 [03:01<00:06, 390.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152571/154893 [03:02<00:06, 366.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152609/154893 [03:02<00:06, 353.41 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152647/154893 [03:02<00:06, 355.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152683/154893 [03:02<00:06, 348.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152719/154893 [03:02<00:06, 344.40 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152757/154893 [03:02<00:06, 345.99 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152793/154893 [03:02<00:06, 339.04 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152830/154893 [03:02<00:06, 340.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152869/154893 [03:02<00:05, 345.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152905/154893 [03:03<00:06, 328.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152939/154893 [03:03<00:06, 315.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 152971/154893 [03:03<00:06, 294.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153001/154893 [03:03<00:06, 290.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153031/154893 [03:03<00:06, 287.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153061/154893 [03:03<00:06, 282.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153090/154893 [03:03<00:06, 277.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153120/154893 [03:03<00:06, 275.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153150/154893 [03:03<00:06, 282.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153180/154893 [03:04<00:06, 269.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153211/154893 [03:04<00:06, 279.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153240/154893 [03:04<00:06, 273.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153268/154893 [03:04<00:06, 263.55 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153299/154893 [03:04<00:05, 267.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153329/154893 [03:04<00:05, 275.66 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153358/154893 [03:04<00:05, 270.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153386/154893 [03:04<00:05, 269.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153414/154893 [03:04<00:05, 264.86 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153446/154893 [03:05<00:05, 277.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153474/154893 [03:05<00:05, 277.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153502/154893 [03:05<00:05, 266.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153530/154893 [03:05<00:05, 269.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153558/154893 [03:05<00:04, 268.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153588/154893 [03:05<00:04, 266.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153620/154893 [03:05<00:04, 278.93 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153648/154893 [03:05<00:04, 270.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153677/154893 [03:05<00:04, 267.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153705/154893 [03:05<00:04, 265.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153733/154893 [03:06<00:04, 255.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153759/154893 [03:06<00:04, 246.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153785/154893 [03:06<00:04, 249.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153811/154893 [03:06<00:04, 243.98 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153837/154893 [03:06<00:04, 242.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153863/154893 [03:06<00:04, 236.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153893/154893 [03:06<00:04, 244.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153918/154893 [03:06<00:04, 241.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153946/154893 [03:07<00:03, 245.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153974/154893 [03:07<00:03, 243.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 153999/154893 [03:07<00:03, 235.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 154023/154893 [03:07<00:03, 222.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 154047/154893 [03:07<00:04, 202.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 154069/154893 [03:07<00:04, 189.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 154089/154893 [03:07<00:04, 162.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64):  99%|███████▉| 154107/154893 [03:07<00:05, 155.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154123/154893 [03:08<00:04, 156.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154139/154893 [03:08<00:05, 146.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154155/154893 [03:08<00:04, 148.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154173/154893 [03:08<00:04, 146.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154199/154893 [03:08<00:03, 174.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154217/154893 [03:08<00:03, 169.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154235/154893 [03:08<00:04, 160.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154252/154893 [03:08<00:04, 152.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154270/154893 [03:08<00:03, 158.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154288/154893 [03:09<00:03, 151.25 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154306/154893 [03:09<00:03, 156.54 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154322/154893 [03:09<00:03, 149.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154339/154893 [03:09<00:03, 144.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154357/154893 [03:09<00:03, 144.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154377/154893 [03:09<00:03, 145.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154396/154893 [03:09<00:03, 146.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154414/154893 [03:09<00:03, 146.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154432/154893 [03:10<00:02, 154.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154448/154893 [03:10<00:02, 153.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154464/154893 [03:10<00:02, 143.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154481/154893 [03:10<00:02, 147.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154497/154893 [03:10<00:02, 147.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154512/154893 [03:10<00:02, 145.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154528/154893 [03:10<00:02, 134.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154542/154893 [03:10<00:02, 134.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154556/154893 [03:11<00:02, 127.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154570/154893 [03:11<00:02, 120.44 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154586/154893 [03:11<00:02, 118.97 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154602/154893 [03:11<00:02, 117.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154618/154893 [03:11<00:02, 116.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154633/154893 [03:11<00:02, 116.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|��██████▉| 154646/154893 [03:11<00:02, 114.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154658/154893 [03:11<00:02, 112.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154670/154893 [03:12<00:02, 108.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|███████▉| 154682/154893 [03:12<00:02, 103.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154694/154893 [03:12<00:02, 99.05 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154704/154893 [03:12<00:01, 98.36 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154715/154893 [03:12<00:01, 90.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154725/154893 [03:12<00:02, 82.58 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154735/154893 [03:12<00:01, 82.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154747/154893 [03:12<00:01, 83.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154759/154893 [03:13<00:01, 83.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154771/154893 [03:13<00:01, 84.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154781/154893 [03:13<00:01, 84.23 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154794/154893 [03:13<00:01, 84.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154806/154893 [03:13<00:01, 84.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154817/154893 [03:13<00:00, 85.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154829/154893 [03:13<00:00, 86.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154839/154893 [03:14<00:00, 89.76 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154849/154893 [03:14<00:00, 87.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154859/154893 [03:14<00:00, 76.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154867/154893 [03:14<00:00, 71.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154876/154893 [03:14<00:00, 65.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154884/154893 [03:14<00:00, 62.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Map (num_proc=64): 100%|████████▉| 154892/154893 [03:14<00:00, 61.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                                                                                \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   0%|                   | 0/154893 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   1%|       | 1000/154893 [00:02<05:16, 486.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   1%|       | 2000/154893 [00:02<03:02, 838.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   2%|▏      | 3000/154893 [00:03<02:42, 934.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   3%|▏      | 4000/154893 [00:04<02:51, 878.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   3%|▏     | 5000/154893 [00:04<01:54, 1304.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   4%|▏     | 6000/154893 [00:05<01:39, 1501.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   5%|▎     | 7000/154893 [00:05<01:15, 1966.85 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   5%|▎     | 8000/154893 [00:05<00:59, 2481.35 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   6%|▎     | 9000/154893 [00:06<00:53, 2750.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   6%|▎    | 10000/154893 [00:06<00:48, 2983.59 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   7%|▎    | 11000/154893 [00:06<00:55, 2581.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   7%|▎    | 11420/154893 [00:07<00:57, 2477.82 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   8%|▍    | 12420/154893 [00:07<01:16, 1853.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   9%|▍    | 13420/154893 [00:08<01:03, 2232.22 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):   9%|▍    | 14420/154893 [00:08<00:50, 2772.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  12%|▌    | 18420/154893 [00:08<00:27, 4990.95 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  13%|▋    | 20420/154893 [00:09<00:25, 5251.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  14%|▋    | 21841/154893 [00:09<00:24, 5363.57 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  15%|▊    | 23261/154893 [00:10<00:43, 3015.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  16%|▊    | 24682/154893 [00:10<00:40, 3183.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  18%|▉    | 27682/154893 [00:10<00:24, 5225.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  19%|▉    | 29682/154893 [00:10<00:19, 6548.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  20%|█    | 31682/154893 [00:11<00:18, 6843.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  22%|█    | 33682/154893 [00:11<00:24, 4880.08 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  23%|█▏   | 35103/154893 [00:12<00:32, 3683.29 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  24%|█▏   | 37524/154893 [00:12<00:27, 4345.51 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  25%|█▎   | 38945/154893 [00:13<00:24, 4830.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  26%|█▎   | 40945/154893 [00:13<00:23, 4847.19 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  27%|█▎   | 41945/154893 [00:13<00:22, 5004.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  28%|█▍   | 43366/154893 [00:14<00:26, 4213.64 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  30%|█▌   | 46786/154893 [00:14<00:19, 5449.20 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  31%|█▌   | 48786/154893 [00:14<00:16, 6627.27 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  33%|█▋   | 50628/154893 [00:16<00:31, 3330.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  33%|█▋   | 51628/154893 [00:16<00:28, 3598.15 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  36%|█▊   | 55628/154893 [00:16<00:18, 5487.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  37%|█▊   | 56628/154893 [00:16<00:18, 5201.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  37%|█▊   | 57628/154893 [00:17<00:23, 4212.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  38%|█▉   | 59628/154893 [00:17<00:20, 4697.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  39%|█▉   | 61049/154893 [00:17<00:17, 5266.50 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  41%|██   | 63470/154893 [00:18<00:21, 4206.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  42%|██   | 64891/154893 [00:18<00:20, 4448.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  44%|██▏  | 67891/154893 [00:19<00:15, 5799.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  44%|██▏  | 68891/154893 [00:19<00:14, 5903.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  46%|██▎  | 70891/154893 [00:19<00:11, 7008.45 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  46%|██▎  | 71891/154893 [00:19<00:11, 7330.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  47%|██▎  | 73312/154893 [00:19<00:10, 7632.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  48%|██▍  | 74312/154893 [00:19<00:10, 7751.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  49%|██▍  | 75733/154893 [00:20<00:12, 6323.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  50%|██▍  | 77153/154893 [00:20<00:14, 5226.07 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  50%|██▌  | 78153/154893 [00:20<00:13, 5503.39 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  51%|██▌  | 79573/154893 [00:20<00:12, 6106.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  52%|██▌  | 80573/154893 [00:21<00:16, 4585.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  53%|██▋  | 81573/154893 [00:21<00:16, 4341.00 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  54%|██▋  | 83993/154893 [00:21<00:10, 7009.31 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  56%|██▊  | 85993/154893 [00:22<00:10, 6417.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  56%|██▊  | 87413/154893 [00:22<00:13, 4953.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  57%|██▊  | 88413/154893 [00:22<00:12, 5447.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  58%|██▉  | 89413/154893 [00:22<00:14, 4595.67 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  58%|██▉  | 90413/154893 [00:23<00:12, 5296.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  60%|███  | 93253/154893 [00:23<00:08, 7268.92 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  61%|███  | 94673/154893 [00:23<00:07, 7600.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  62%|███  | 95673/154893 [00:23<00:09, 6044.33 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  62%|███  | 96673/154893 [00:23<00:08, 6602.10 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  63%|███▏ | 97673/154893 [00:23<00:08, 6535.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  64%|███▏ | 98673/154893 [00:24<00:12, 4434.91 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  64%|███▏ | 99673/154893 [00:24<00:12, 4429.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  66%|██▋ | 101673/154893 [00:25<00:12, 4367.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  66%|██▋ | 102673/154893 [00:25<00:14, 3523.13 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  67%|██▋ | 104093/154893 [00:25<00:14, 3592.16 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  68%|██▋ | 104933/154893 [00:26<00:16, 3103.28 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  68%|██▋ | 105353/154893 [00:27<00:24, 2018.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  69%|██▋ | 106353/154893 [00:27<00:29, 1623.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  70%|██▊ | 108353/154893 [00:28<00:18, 2561.14 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  70%|██▊ | 108773/154893 [00:28<00:19, 2311.63 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  71%|██▊ | 109773/154893 [00:28<00:18, 2434.79 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  72%|██▉ | 112193/154893 [00:28<00:09, 4301.52 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  74%|██▉ | 114193/154893 [00:29<00:06, 6059.90 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  75%|██▉ | 116033/154893 [00:30<00:11, 3305.34 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  76%|███ | 117033/154893 [00:30<00:11, 3192.02 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  76%|███ | 118033/154893 [00:31<00:14, 2541.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  77%|███ | 118873/154893 [00:32<00:19, 1868.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  77%|███ | 119293/154893 [00:32<00:19, 1828.96 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  78%|███ | 120713/154893 [00:32<00:15, 2214.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  79%|███▏| 122133/154893 [00:32<00:11, 2957.73 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  80%|███▏| 123553/154893 [00:33<00:11, 2715.18 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  80%|███▏| 123973/154893 [00:33<00:12, 2489.47 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  81%|███▏| 124973/154893 [00:34<00:11, 2601.03 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  81%|███▏| 125393/154893 [00:34<00:11, 2602.78 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  82%|███▎| 127393/154893 [00:34<00:06, 4502.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  84%|███▎| 129393/154893 [00:34<00:03, 6539.38 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  84%|███▎| 130393/154893 [00:34<00:04, 5452.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  85%|███▍| 131813/154893 [00:35<00:04, 5591.80 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  86%|███▍| 132813/154893 [00:35<00:04, 5076.68 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  86%|███▍| 133813/154893 [00:35<00:03, 5587.61 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  87%|███▍| 135233/154893 [00:36<00:05, 3319.65 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  89%|███▌| 137233/154893 [00:36<00:03, 4597.24 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  89%|███▌| 138233/154893 [00:37<00:05, 3260.01 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  90%|███▌| 139073/154893 [00:37<00:04, 3445.26 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  90%|███▌| 140073/154893 [00:37<00:04, 3297.89 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  91%|███▋| 140913/154893 [00:38<00:06, 2314.84 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  92%|███▋| 141913/154893 [00:38<00:06, 2161.74 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  92%|███▋| 142913/154893 [00:39<00:04, 2665.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  93%|███▋| 143913/154893 [00:39<00:03, 3216.11 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  93%|███▋| 144753/154893 [00:39<00:03, 3287.62 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  94%|███▊| 146173/154893 [00:39<00:02, 3129.53 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  95%|███▊| 146593/154893 [00:40<00:02, 2961.77 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  95%|███▊| 147013/154893 [00:40<00:02, 2668.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  95%|███▊| 147433/154893 [00:40<00:03, 2257.09 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  95%|███▊| 147853/154893 [00:40<00:02, 2353.49 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  96%|███▊| 148273/154893 [00:40<00:02, 2259.48 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  97%|███▊| 149693/154893 [00:41<00:01, 2783.71 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  97%|███▉| 150113/154893 [00:41<00:01, 2402.46 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  97%|███▉| 150533/154893 [00:41<00:02, 2102.69 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  98%|███▉| 151953/154893 [00:42<00:01, 1704.37 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  99%|███▉| 152793/154893 [00:43<00:01, 1336.21 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  99%|████▉| 153213/154893 [00:45<00:01, 917.17 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  99%|████▉| 153633/154893 [00:45<00:01, 877.42 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64):  99%|████▉| 154053/154893 [00:46<00:01, 832.70 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64): 100%|████▉| 154473/154893 [00:48<00:00, 470.60 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Filter (num_proc=64): 100%|█████| 154893/154893 [00:48<00:00, 555.32 examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "                                                                                \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/8 shards):   0%|         | 0/98542 [00:00<?, ? examples/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/8 shards):   2%| | 2000/98542 [00:00<00:05, 18061.42 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/8 shards):   6%| | 6000/98542 [00:00<00:04, 20465.43 examp"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/8 shards):  10%| | 10000/98542 [00:00<00:04, 21386.26 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (0/8 shards):  13%|▏| 12318/98542 [00:00<00:03, 21781.91 exam\r",
-      "Saving the dataset (1/8 shards):  13%|▏| 12318/98542 [00:00<00:03, 21781.91 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/8 shards):  17%|▏| 16318/98542 [00:00<00:03, 22171.01 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/8 shards):  21%|▏| 20318/98542 [00:00<00:03, 23162.69 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (1/8 shards):  25%|▏| 24318/98542 [00:01<00:03, 23804.66 exam\r",
-      "Saving the dataset (2/8 shards):  25%|▎| 24636/98542 [00:01<00:03, 23804.66 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (2/8 shards):  29%|▎| 28636/98542 [00:01<00:02, 23306.69 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (2/8 shards):  33%|▎| 32636/98542 [00:01<00:02, 23653.61 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (2/8 shards):  37%|▎| 36636/98542 [00:01<00:02, 23931.20 exam\r",
-      "Saving the dataset (3/8 shards):  38%|▍| 36954/98542 [00:01<00:02, 23931.20 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (3/8 shards):  42%|▍| 40954/98542 [00:01<00:02, 24232.11 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (3/8 shards):  46%|▍| 44954/98542 [00:01<00:02, 24742.01 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (3/8 shards):  50%|▍| 48954/98542 [00:02<00:02, 23551.32 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (4/8 shards):  50%|▌| 49272/98542 [00:02<00:02, 23551.32 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (4/8 shards):  54%|▌| 53272/98542 [00:02<00:01, 24415.79 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (4/8 shards):  58%|▌| 57272/98542 [00:02<00:01, 25477.78 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (4/8 shards):  62%|▌| 61272/98542 [00:02<00:01, 26322.44 exam\r",
-      "Saving the dataset (5/8 shards):  63%|▋| 61590/98542 [00:02<00:01, 26322.44 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (5/8 shards):  67%|▋| 65590/98542 [00:02<00:01, 26727.76 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (5/8 shards):  71%|▋| 69590/98542 [00:02<00:01, 27439.45 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (5/8 shards):  75%|▋| 73590/98542 [00:02<00:00, 28191.01 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (6/8 shards):  75%|▊| 73908/98542 [00:02<00:00, 28191.01 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (6/8 shards):  79%|▊| 77908/98542 [00:03<00:00, 28062.31 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (6/8 shards):  83%|▊| 81908/98542 [00:03<00:00, 28673.78 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (6/8 shards):  87%|▊| 85908/98542 [00:03<00:00, 28266.52 exam\r",
-      "Saving the dataset (7/8 shards):  88%|▉| 86225/98542 [00:03<00:00, 28266.52 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (7/8 shards):  92%|▉| 90225/98542 [00:03<00:00, 28190.54 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (7/8 shards):  96%|▉| 94225/98542 [00:03<00:00, 28171.77 exam"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Saving the dataset (7/8 shards): 100%|▉| 98225/98542 [00:03<00:00, 28544.35 exam\r",
-      "Saving the dataset (8/8 shards): 100%|█| 98542/98542 [00:03<00:00, 28544.35 exam\r",
-      "                                                                                \r",
-      "\r",
-      "Saving the dataset (0/1 shards):   0%|            | 0/99 [00:00<?, ? examples/s]\r",
-      "Saving the dataset (1/1 shards): 100%|█| 99/99 [00:00<00:00, 7155.30 examples/s]\r",
-      "                                                                                \r"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[rank: 0] Global seed set to 3809433325\r\n",
-      "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\r\n",
-      "[2023-09-02 08:47:32,098] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Enabling DeepSpeed BF16.\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
-      "#\r\n",
-      "# RWKV lighting_trainer.py important notes \r\n",
-      "# https://github.com/RWKV/RWKV-infctx-trainer \r\n",
-      "#\r\n",
-      "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\r\n",
-      "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n",
-      "# - When resuming from checkpoint, the estimated time is inaccurate\r\n",
-      "#\r\n",
-      "\r\n",
-      "[RWKV.model] Configuring optimizer with\r\n",
-      "    - lr_init:  1.000e-04 (0.0001)\r\n",
-      "    - lr_final: 1.000e-04 (0.0001)\r\n",
-      "\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Detected CUDA files, patching ldflags\r\n",
-      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n",
-      "Building extension module fused_adam...\r\n",
-      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
-      "ninja: no work to do.\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Time to load fused_adam op: 0.0724024772644043 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading extension module fused_adam...\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Time to load fused_adam op: 0.10135412216186523 seconds\r\n",
-      "Time to load fused_adam op: 0.1016843318939209 seconds\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Time to load fused_adam op: 0.10171890258789062 seconds\r\n",
-      "Time to load fused_adam op: 0.10201501846313477 seconds\r\n",
-      "Time to load fused_adam op: 0.10342574119567871 seconds\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Loading extension module fused_adam...\r\n",
-      "Time to load fused_adam op: 0.11149120330810547 seconds\r\n",
-      "Time to load fused_adam op: 0.1042928695678711 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n",
-      "Building extension module utils...\r\n",
-      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n",
-      "ninja: no work to do.\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0709686279296875 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.10201740264892578 seconds\r\n",
-      "Loading extension module utils...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.10213279724121094 seconds\r\n",
-      "Time to load utils op: 0.10248398780822754 seconds\r\n",
-      "Loading extension module utils...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.10281991958618164 seconds\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.10238265991210938 seconds\r\n",
-      "Time to load utils op: 0.10246157646179199 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.10242843627929688 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] "
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n",
-      "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0006196498870849609 seconds\r\n",
-      "Time to load utils op: 0.0006136894226074219 seconds\r\n",
-      "Time to load utils op: 0.0007157325744628906 seconds\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0006725788116455078 seconds\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0008366107940673828 seconds\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0011620521545410156 seconds\r\n",
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0012750625610351562 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
-      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
-      "Loading extension module utils...\r\n",
-      "Time to load utils op: 0.0009191036224365234 seconds\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r\n",
-      "  | Name   | Type       | Params\r\n",
-      "--------------------------------------\r\n",
-      "0 | emb    | Embedding  | 51.5 M\r\n",
-      "1 | blocks | ModuleList | 1.3 B \r\n",
-      "2 | ln_out | LayerNorm  | 2.0 K \r\n",
-      "3 | head   | Linear     | 51.5 M\r\n",
-      "--------------------------------------\r\n",
-      "1.4 B     Trainable params\r\n",
-      "0         Non-trainable params\r\n",
-      "1.4 B     Total params\r\n",
-      "5,650.715 Total estimated model params size (MB)\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Training: 0it [00:00, ?it/s]\r",
-      "Training:   0%|                                       | 0/12318 [00:00<?, ?it/s]\r",
-      "Epoch 0:   0%|                                        | 0/12318 [00:00<?, ?it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%|                             | 1/12318 [00:14<49:34:41, 14.49s/it]\r",
-      "Epoch 0:   0%| | 1/12318 [00:14<49:34:57, 14.49s/it, v_num=e4xv, train/loss=5.16"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 2/12318 [00:21<37:08:25, 10.86s/it, v_num=e4xv, train/loss=5.16\r",
-      "Epoch 0:   0%| | 2/12318 [00:21<37:08:31, 10.86s/it, v_num=e4xv, train/loss=4.91"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 3/12318 [00:28<32:16:47,  9.44s/it, v_num=e4xv, train/loss=4.91\r",
-      "Epoch 0:   0%| | 3/12318 [00:28<32:16:50,  9.44s/it, v_num=e4xv, train/loss=5.22"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 4/12318 [00:34<29:10:24,  8.53s/it, v_num=e4xv, train/loss=5.22\r",
-      "Epoch 0:   0%| | 4/12318 [00:34<29:10:27,  8.53s/it, v_num=e4xv, train/loss=0.13"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 5/12318 [00:39<27:14:19,  7.96s/it, v_num=e4xv, train/loss=0.13\r",
-      "Epoch 0:   0%| | 5/12318 [00:39<27:14:21,  7.96s/it, v_num=e4xv, train/loss=0.60"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 6/12318 [00:45<25:50:12,  7.55s/it, v_num=e4xv, train/loss=0.60\r",
-      "Epoch 0:   0%| | 6/12318 [00:45<25:50:14,  7.55s/it, v_num=e4xv, train/loss=3.02"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 7/12318 [00:48<23:47:51,  6.96s/it, v_num=e4xv, train/loss=3.02\r",
-      "Epoch 0:   0%| | 7/12318 [00:48<23:47:53,  6.96s/it, v_num=e4xv, train/loss=0.15"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 8/12318 [00:52<22:29:22,  6.58s/it, v_num=e4xv, train/loss=0.15\r",
-      "Epoch 0:   0%| | 8/12318 [00:52<22:29:23,  6.58s/it, v_num=e4xv, train/loss=0.24"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 9/12318 [00:54<20:42:54,  6.06s/it, v_num=e4xv, train/loss=0.24\r",
-      "Epoch 0:   0%| | 9/12318 [00:54<20:42:56,  6.06s/it, v_num=e4xv, train/loss=0.01"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 10/12318 [00:56<19:26:57,  5.69s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 10/12318 [00:56<19:26:57,  5.69s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 11/12318 [01:00<18:54:02,  5.53s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 11/12318 [01:00<18:54:03,  5.53s/it, v_num=e4xv, train/loss=0.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 12/12318 [01:06<18:54:19,  5.53s/it, v_num=e4xv, train/loss=0.2\r",
-      "Epoch 0:   0%| | 12/12318 [01:06<18:54:20,  5.53s/it, v_num=e4xv, train/loss=3.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 13/12318 [01:11<18:45:45,  5.49s/it, v_num=e4xv, train/loss=3.1\r",
-      "Epoch 0:   0%| | 13/12318 [01:11<18:45:45,  5.49s/it, v_num=e4xv, train/loss=0.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 14/12318 [01:14<18:16:33,  5.35s/it, v_num=e4xv, train/loss=0.8\r",
-      "Epoch 0:   0%| | 14/12318 [01:14<18:16:33,  5.35s/it, v_num=e4xv, train/loss=0.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 15/12318 [01:23<19:01:04,  5.56s/it, v_num=e4xv, train/loss=0.1\r",
-      "Epoch 0:   0%| | 15/12318 [01:23<19:01:04,  5.56s/it, v_num=e4xv, train/loss=8.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 16/12318 [01:26<18:26:28,  5.40s/it, v_num=e4xv, train/loss=8.2\r",
-      "Epoch 0:   0%| | 16/12318 [01:26<18:26:29,  5.40s/it, v_num=e4xv, train/loss=0.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 17/12318 [01:33<18:53:03,  5.53s/it, v_num=e4xv, train/loss=0.1\r",
-      "Epoch 0:   0%| | 17/12318 [01:33<18:53:03,  5.53s/it, v_num=e4xv, train/loss=6.9"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 18/12318 [01:42<19:28:49,  5.70s/it, v_num=e4xv, train/loss=6.9\r",
-      "Epoch 0:   0%| | 18/12318 [01:42<19:28:49,  5.70s/it, v_num=e4xv, train/loss=8.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 19/12318 [01:45<18:58:13,  5.55s/it, v_num=e4xv, train/loss=8.5\r",
-      "Epoch 0:   0%| | 19/12318 [01:45<18:58:13,  5.55s/it, v_num=e4xv, train/loss=0.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 20/12318 [01:51<18:57:35,  5.55s/it, v_num=e4xv, train/loss=0.1\r",
-      "Epoch 0:   0%| | 20/12318 [01:51<18:57:35,  5.55s/it, v_num=e4xv, train/loss=2.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 21/12318 [01:52<18:21:15,  5.37s/it, v_num=e4xv, train/loss=2.0\r",
-      "Epoch 0:   0%| | 21/12318 [01:52<18:21:15,  5.37s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 22/12318 [01:59<18:33:22,  5.43s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 22/12318 [01:59<18:33:22,  5.43s/it, v_num=e4xv, train/loss=1.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 23/12318 [02:07<18:52:55,  5.53s/it, v_num=e4xv, train/loss=1.1\r",
-      "Epoch 0:   0%| | 23/12318 [02:07<18:52:55,  5.53s/it, v_num=e4xv, train/loss=7.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 24/12318 [02:09<18:25:39,  5.40s/it, v_num=e4xv, train/loss=7.3\r",
-      "Epoch 0:   0%| | 24/12318 [02:09<18:25:39,  5.40s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 25/12318 [02:12<18:08:46,  5.31s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 25/12318 [02:12<18:08:46,  5.31s/it, v_num=e4xv, train/loss=0.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 26/12318 [02:16<17:57:50,  5.26s/it, v_num=e4xv, train/loss=0.1\r",
-      "Epoch 0:   0%| | 26/12318 [02:16<17:57:50,  5.26s/it, v_num=e4xv, train/loss=0.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 27/12318 [02:19<17:39:44,  5.17s/it, v_num=e4xv, train/loss=0.2\r",
-      "Epoch 0:   0%| | 27/12318 [02:19<17:39:45,  5.17s/it, v_num=e4xv, train/loss=0.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 28/12318 [02:23<17:30:43,  5.13s/it, v_num=e4xv, train/loss=0.1\r",
-      "Epoch 0:   0%| | 28/12318 [02:23<17:30:43,  5.13s/it, v_num=e4xv, train/loss=0.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 29/12318 [02:28<17:29:02,  5.12s/it, v_num=e4xv, train/loss=0.2\r",
-      "Epoch 0:   0%| | 29/12318 [02:28<17:29:02,  5.12s/it, v_num=e4xv, train/loss=1.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 30/12318 [02:36<17:46:10,  5.21s/it, v_num=e4xv, train/loss=1.0\r",
-      "Epoch 0:   0%| | 30/12318 [02:36<17:46:10,  5.21s/it, v_num=e4xv, train/loss=7.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 31/12318 [02:44<18:09:08,  5.32s/it, v_num=e4xv, train/loss=7.3\r",
-      "Epoch 0:   0%| | 31/12318 [02:44<18:09:09,  5.32s/it, v_num=e4xv, train/loss=8.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 32/12318 [03:21<21:31:18,  6.31s/it, v_num=e4xv, train/loss=8.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 32/12318 [03:24<21:49:48,  6.40s/it, v_num=e4xv, train/loss=7.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 33/12318 [03:32<21:59:26,  6.44s/it, v_num=e4xv, train/loss=7.2\r",
-      "Epoch 0:   0%| | 33/12318 [03:32<21:59:26,  6.44s/it, v_num=e4xv, train/loss=8.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 34/12318 [03:37<21:50:19,  6.40s/it, v_num=e4xv, train/loss=8.1\r",
-      "Epoch 0:   0%| | 34/12318 [03:37<21:50:19,  6.40s/it, v_num=e4xv, train/loss=4.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 35/12318 [03:45<21:57:12,  6.43s/it, v_num=e4xv, train/loss=4.8\r",
-      "Epoch 0:   0%| | 35/12318 [03:45<21:57:12,  6.43s/it, v_num=e4xv, train/loss=8.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 36/12318 [03:54<22:13:51,  6.52s/it, v_num=e4xv, train/loss=8.0\r",
-      "Epoch 0:   0%| | 36/12318 [03:54<22:13:51,  6.52s/it, v_num=e4xv, train/loss=8.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 37/12318 [04:01<22:13:57,  6.52s/it, v_num=e4xv, train/loss=8.3\r",
-      "Epoch 0:   0%| | 37/12318 [04:01<22:13:57,  6.52s/it, v_num=e4xv, train/loss=7.9"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 38/12318 [04:06<22:08:18,  6.49s/it, v_num=e4xv, train/loss=7.9\r",
-      "Epoch 0:   0%| | 38/12318 [04:06<22:08:18,  6.49s/it, v_num=e4xv, train/loss=6.7"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 39/12318 [04:09<21:49:14,  6.40s/it, v_num=e4xv, train/loss=6.7\r",
-      "Epoch 0:   0%| | 39/12318 [04:09<21:49:14,  6.40s/it, v_num=e4xv, train/loss=1.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 40/12318 [04:17<21:56:42,  6.43s/it, v_num=e4xv, train/loss=1.5\r",
-      "Epoch 0:   0%| | 40/12318 [04:17<21:56:42,  6.43s/it, v_num=e4xv, train/loss=8.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 41/12318 [04:22<21:49:39,  6.40s/it, v_num=e4xv, train/loss=8.0\r",
-      "Epoch 0:   0%| | 41/12318 [04:22<21:49:39,  6.40s/it, v_num=e4xv, train/loss=5.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 42/12318 [04:26<21:40:27,  6.36s/it, v_num=e4xv, train/loss=5.3\r",
-      "Epoch 0:   0%| | 42/12318 [04:26<21:40:27,  6.36s/it, v_num=e4xv, train/loss=3.4"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 43/12318 [04:35<21:51:48,  6.41s/it, v_num=e4xv, train/loss=3.4\r",
-      "Epoch 0:   0%| | 43/12318 [04:35<21:51:48,  6.41s/it, v_num=e4xv, train/loss=8.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 44/12318 [04:43<21:57:20,  6.44s/it, v_num=e4xv, train/loss=8.3\r",
-      "Epoch 0:   0%| | 44/12318 [04:43<21:57:20,  6.44s/it, v_num=e4xv, train/loss=8.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 45/12318 [04:45<21:36:25,  6.34s/it, v_num=e4xv, train/loss=8.1\r",
-      "Epoch 0:   0%| | 45/12318 [04:45<21:36:26,  6.34s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 46/12318 [04:50<21:30:04,  6.31s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 46/12318 [04:50<21:30:04,  6.31s/it, v_num=e4xv, train/loss=4.7"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 47/12318 [04:56<21:30:51,  6.31s/it, v_num=e4xv, train/loss=4.7\r",
-      "Epoch 0:   0%| | 47/12318 [04:56<21:30:51,  6.31s/it, v_num=e4xv, train/loss=8.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 48/12318 [05:00<21:18:19,  6.25s/it, v_num=e4xv, train/loss=8.1\r",
-      "Epoch 0:   0%| | 48/12318 [05:00<21:18:19,  6.25s/it, v_num=e4xv, train/loss=1.7"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 49/12318 [05:08<21:28:08,  6.30s/it, v_num=e4xv, train/loss=1.7\r",
-      "Epoch 0:   0%| | 49/12318 [05:08<21:28:08,  6.30s/it, v_num=e4xv, train/loss=8.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 50/12318 [05:11<21:13:56,  6.23s/it, v_num=e4xv, train/loss=8.3\r",
-      "Epoch 0:   0%| | 50/12318 [05:11<21:13:56,  6.23s/it, v_num=e4xv, train/loss=1.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 51/12318 [05:20<21:23:15,  6.28s/it, v_num=e4xv, train/loss=1.2\r",
-      "Epoch 0:   0%| | 51/12318 [05:20<21:23:15,  6.28s/it, v_num=e4xv, train/loss=8.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 52/12318 [05:26<21:24:12,  6.28s/it, v_num=e4xv, train/loss=8.2\r",
-      "Epoch 0:   0%| | 52/12318 [05:26<21:24:12,  6.28s/it, v_num=e4xv, train/loss=7.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 53/12318 [05:34<21:29:15,  6.31s/it, v_num=e4xv, train/loss=7.6\r",
-      "Epoch 0:   0%| | 53/12318 [05:34<21:29:15,  6.31s/it, v_num=e4xv, train/loss=8.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 54/12318 [05:37<21:18:11,  6.25s/it, v_num=e4xv, train/loss=8.1\r",
-      "Epoch 0:   0%| | 54/12318 [05:37<21:18:11,  6.25s/it, v_num=e4xv, train/loss=1.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 55/12318 [05:40<21:03:29,  6.18s/it, v_num=e4xv, train/loss=1.6\r",
-      "Epoch 0:   0%| | 55/12318 [05:40<21:03:30,  6.18s/it, v_num=e4xv, train/loss=1.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 56/12318 [05:42<20:49:10,  6.11s/it, v_num=e4xv, train/loss=1.1\r",
-      "Epoch 0:   0%| | 56/12318 [05:42<20:49:10,  6.11s/it, v_num=e4xv, train/loss=0.9"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 57/12318 [05:50<20:57:41,  6.15s/it, v_num=e4xv, train/loss=0.9\r",
-      "Epoch 0:   0%| | 57/12318 [05:50<20:57:41,  6.15s/it, v_num=e4xv, train/loss=8.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 58/12318 [05:56<20:55:08,  6.14s/it, v_num=e4xv, train/loss=8.2\r",
-      "Epoch 0:   0%| | 58/12318 [05:56<20:55:09,  6.14s/it, v_num=e4xv, train/loss=6.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 59/12318 [05:57<20:38:13,  6.06s/it, v_num=e4xv, train/loss=6.3\r",
-      "Epoch 0:   0%| | 59/12318 [05:57<20:38:13,  6.06s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 60/12318 [06:05<20:43:03,  6.08s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   0%| | 60/12318 [06:05<20:43:03,  6.08s/it, v_num=e4xv, train/loss=8.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   0%| | 61/12318 [06:09<20:39:03,  6.07s/it, v_num=e4xv, train/loss=8.3\r",
-      "Epoch 0:   0%| | 61/12318 [06:09<20:39:03,  6.07s/it, v_num=e4xv, train/loss=5.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 62/12318 [06:16<20:40:53,  6.07s/it, v_num=e4xv, train/loss=5.2\r",
-      "Epoch 0:   1%| | 62/12318 [06:16<20:40:53,  6.07s/it, v_num=e4xv, train/loss=8.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 63/12318 [06:20<20:33:49,  6.04s/it, v_num=e4xv, train/loss=8.0\r",
-      "Epoch 0:   1%| | 63/12318 [06:20<20:33:49,  6.04s/it, v_num=e4xv, train/loss=2.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 64/12318 [06:33<20:57:06,  6.16s/it, v_num=e4xv, train/loss=2.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 64/12318 [06:36<21:06:21,  6.20s/it, v_num=e4xv, train/loss=1.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 65/12318 [06:45<21:14:06,  6.24s/it, v_num=e4xv, train/loss=1.2\r",
-      "Epoch 0:   1%| | 65/12318 [06:45<21:14:07,  6.24s/it, v_num=e4xv, train/loss=7.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 66/12318 [06:49<21:07:01,  6.20s/it, v_num=e4xv, train/loss=7.6\r",
-      "Epoch 0:   1%| | 66/12318 [06:49<21:07:01,  6.20s/it, v_num=e4xv, train/loss=3.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 67/12318 [06:52<20:58:32,  6.16s/it, v_num=e4xv, train/loss=3.2\r",
-      "Epoch 0:   1%| | 67/12318 [06:52<20:58:33,  6.16s/it, v_num=e4xv, train/loss=3.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 68/12318 [07:00<21:03:08,  6.19s/it, v_num=e4xv, train/loss=3.1\r",
-      "Epoch 0:   1%| | 68/12318 [07:00<21:03:08,  6.19s/it, v_num=e4xv, train/loss=7.4"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 69/12318 [07:06<21:01:11,  6.18s/it, v_num=e4xv, train/loss=7.4\r",
-      "Epoch 0:   1%| | 69/12318 [07:06<21:01:12,  6.18s/it, v_num=e4xv, train/loss=6.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 70/12318 [07:13<21:05:24,  6.20s/it, v_num=e4xv, train/loss=6.2\r",
-      "Epoch 0:   1%| | 70/12318 [07:13<21:05:24,  6.20s/it, v_num=e4xv, train/loss=7.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 71/12318 [07:18<21:00:16,  6.17s/it, v_num=e4xv, train/loss=7.5\r",
-      "Epoch 0:   1%| | 71/12318 [07:18<21:00:16,  6.17s/it, v_num=e4xv, train/loss=4.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 72/12318 [07:23<20:56:49,  6.16s/it, v_num=e4xv, train/loss=4.2\r",
-      "Epoch 0:   1%| | 72/12318 [07:23<20:56:49,  6.16s/it, v_num=e4xv, train/loss=4.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 73/12318 [07:24<20:43:15,  6.09s/it, v_num=e4xv, train/loss=4.5\r",
-      "Epoch 0:   1%| | 73/12318 [07:24<20:43:15,  6.09s/it, v_num=e4xv, train/loss=0.2"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 74/12318 [07:33<20:50:26,  6.13s/it, v_num=e4xv, train/loss=0.2\r",
-      "Epoch 0:   1%| | 74/12318 [07:33<20:50:26,  6.13s/it, v_num=e4xv, train/loss=7.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 75/12318 [07:38<20:48:44,  6.12s/it, v_num=e4xv, train/loss=7.5\r",
-      "Epoch 0:   1%| | 75/12318 [07:38<20:48:44,  6.12s/it, v_num=e4xv, train/loss=5.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 76/12318 [07:43<20:44:05,  6.10s/it, v_num=e4xv, train/loss=5.8\r",
-      "Epoch 0:   1%| | 76/12318 [07:43<20:44:05,  6.10s/it, v_num=e4xv, train/loss=3.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 77/12318 [07:46<20:36:47,  6.06s/it, v_num=e4xv, train/loss=3.8\r",
-      "Epoch 0:   1%| | 77/12318 [07:46<20:36:47,  6.06s/it, v_num=e4xv, train/loss=2.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 78/12318 [07:51<20:32:28,  6.04s/it, v_num=e4xv, train/loss=2.5\r",
-      "Epoch 0:   1%| | 78/12318 [07:51<20:32:28,  6.04s/it, v_num=e4xv, train/loss=3.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 79/12318 [07:57<20:33:53,  6.05s/it, v_num=e4xv, train/loss=3.8\r",
-      "Epoch 0:   1%| | 79/12318 [07:57<20:33:53,  6.05s/it, v_num=e4xv, train/loss=7.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 80/12318 [08:01<20:27:05,  6.02s/it, v_num=e4xv, train/loss=7.1\r",
-      "Epoch 0:   1%| | 80/12318 [08:01<20:27:05,  6.02s/it, v_num=e4xv, train/loss=3.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 81/12318 [08:05<20:23:12,  6.00s/it, v_num=e4xv, train/loss=3.0\r",
-      "Epoch 0:   1%| | 81/12318 [08:05<20:23:12,  6.00s/it, v_num=e4xv, train/loss=4.1"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 82/12318 [08:09<20:18:01,  5.97s/it, v_num=e4xv, train/loss=4.1\r",
-      "Epoch 0:   1%| | 82/12318 [08:09<20:18:01,  5.97s/it, v_num=e4xv, train/loss=3.5"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 83/12318 [08:16<20:19:27,  5.98s/it, v_num=e4xv, train/loss=3.5\r",
-      "Epoch 0:   1%| | 83/12318 [08:16<20:19:27,  5.98s/it, v_num=e4xv, train/loss=6.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 84/12318 [08:19<20:11:48,  5.94s/it, v_num=e4xv, train/loss=6.6\r",
-      "Epoch 0:   1%| | 84/12318 [08:19<20:11:48,  5.94s/it, v_num=e4xv, train/loss=2.4"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 85/12318 [08:20<20:00:39,  5.89s/it, v_num=e4xv, train/loss=2.4\r",
-      "Epoch 0:   1%| | 85/12318 [08:20<20:00:39,  5.89s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 86/12318 [08:27<20:02:17,  5.90s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   1%| | 86/12318 [08:27<20:02:17,  5.90s/it, v_num=e4xv, train/loss=7.3"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 87/12318 [08:35<20:08:37,  5.93s/it, v_num=e4xv, train/loss=7.3\r",
-      "Epoch 0:   1%| | 87/12318 [08:35<20:08:37,  5.93s/it, v_num=e4xv, train/loss=7.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 88/12318 [08:41<20:07:40,  5.92s/it, v_num=e4xv, train/loss=7.6\r",
-      "Epoch 0:   1%| | 88/12318 [08:41<20:07:40,  5.92s/it, v_num=e4xv, train/loss=6.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 89/12318 [08:43<19:58:12,  5.88s/it, v_num=e4xv, train/loss=6.0\r",
-      "Epoch 0:   1%| | 89/12318 [08:43<19:58:12,  5.88s/it, v_num=e4xv, train/loss=0.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 90/12318 [08:46<19:52:36,  5.85s/it, v_num=e4xv, train/loss=0.6\r",
-      "Epoch 0:   1%| | 90/12318 [08:46<19:52:36,  5.85s/it, v_num=e4xv, train/loss=3.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 91/12318 [08:51<19:50:39,  5.84s/it, v_num=e4xv, train/loss=3.0\r",
-      "Epoch 0:   1%| | 91/12318 [08:51<19:50:39,  5.84s/it, v_num=e4xv, train/loss=4.7"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 92/12318 [09:00<19:56:38,  5.87s/it, v_num=e4xv, train/loss=4.7\r",
-      "Epoch 0:   1%| | 92/12318 [09:00<19:56:38,  5.87s/it, v_num=e4xv, train/loss=7.6"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 93/12318 [09:03<19:50:04,  5.84s/it, v_num=e4xv, train/loss=7.6\r",
-      "Epoch 0:   1%| | 93/12318 [09:03<19:50:04,  5.84s/it, v_num=e4xv, train/loss=2.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 94/12318 [09:10<19:53:59,  5.86s/it, v_num=e4xv, train/loss=2.0\r",
-      "Epoch 0:   1%| | 94/12318 [09:10<19:53:59,  5.86s/it, v_num=e4xv, train/loss=7.4"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 95/12318 [09:14<19:48:39,  5.83s/it, v_num=e4xv, train/loss=7.4\r",
-      "Epoch 0:   1%| | 95/12318 [09:14<19:48:39,  5.83s/it, v_num=e4xv, train/loss=3.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 96/12318 [09:42<20:36:55,  6.07s/it, v_num=e4xv, train/loss=3.0\r",
-      "Epoch 0:   1%| | 96/12318 [09:42<20:36:55,  6.07s/it, v_num=e4xv, train/loss=0.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 97/12318 [09:46<20:32:15,  6.05s/it, v_num=e4xv, train/loss=0.0\r",
-      "Epoch 0:   1%| | 97/12318 [09:46<20:32:15,  6.05s/it, v_num=e4xv, train/loss=2.0"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 98/12318 [09:53<20:33:03,  6.05s/it, v_num=e4xv, train/loss=2.0\r",
-      "Epoch 0:   1%| | 98/12318 [09:53<20:33:03,  6.05s/it, v_num=e4xv, train/loss=6.4"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 99/12318 [09:56<20:26:18,  6.02s/it, v_num=e4xv, train/loss=6.4\r",
-      "Epoch 0:   1%| | 99/12318 [09:56<20:26:18,  6.02s/it, v_num=e4xv, train/loss=0.8"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 100/12318 [09:59<20:20:44,  5.99s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   1%| | 100/12318 [09:59<20:20:44,  5.99s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 101/12318 [10:08<20:25:49,  6.02s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 101/12318 [10:08<20:25:49,  6.02s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 102/12318 [10:11<20:21:30,  6.00s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 102/12318 [10:11<20:21:30,  6.00s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 103/12318 [10:15<20:17:15,  5.98s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 103/12318 [10:15<20:17:15,  5.98s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 104/12318 [10:22<20:18:09,  5.98s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 104/12318 [10:22<20:18:09,  5.98s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 105/12318 [10:30<20:22:55,  6.01s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 105/12318 [10:30<20:22:55,  6.01s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 106/12318 [10:34<20:18:47,  5.99s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 106/12318 [10:34<20:18:47,  5.99s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 107/12318 [10:38<20:13:44,  5.96s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 107/12318 [10:38<20:13:44,  5.96s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 108/12318 [10:42<20:10:44,  5.95s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 108/12318 [10:42<20:10:44,  5.95s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 109/12318 [10:45<20:05:49,  5.93s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 109/12318 [10:45<20:05:49,  5.93s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 110/12318 [10:53<20:09:14,  5.94s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 110/12318 [10:53<20:09:14,  5.94s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 111/12318 [11:01<20:12:19,  5.96s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 111/12318 [11:01<20:12:19,  5.96s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 112/12318 [11:10<20:17:11,  5.98s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 112/12318 [11:10<20:17:12,  5.98s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 113/12318 [11:13<20:12:28,  5.96s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 113/12318 [11:13<20:12:28,  5.96s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 114/12318 [11:20<20:13:34,  5.97s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 114/12318 [11:20<20:13:34,  5.97s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 115/12318 [11:27<20:16:23,  5.98s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 115/12318 [11:27<20:16:23,  5.98s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 116/12318 [11:31<20:11:50,  5.96s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 116/12318 [11:31<20:11:50,  5.96s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 117/12318 [11:35<20:09:09,  5.95s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 117/12318 [11:35<20:09:09,  5.95s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 118/12318 [11:41<20:08:21,  5.94s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 118/12318 [11:41<20:08:21,  5.94s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 119/12318 [11:45<20:05:49,  5.93s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 119/12318 [11:45<20:05:49,  5.93s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 120/12318 [11:53<20:08:43,  5.95s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 120/12318 [11:53<20:08:43,  5.95s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 121/12318 [11:59<20:07:56,  5.94s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 121/12318 [11:59<20:07:56,  5.94s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 122/12318 [12:04<20:07:11,  5.94s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   1%| | 122/12318 [12:04<20:07:11,  5.94s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 123/12318 [12:09<20:05:37,  5.93s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 123/12318 [12:09<20:05:37,  5.93s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 124/12318 [12:13<20:02:15,  5.92s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   1%| | 124/12318 [12:13<20:02:15,  5.92s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 125/12318 [12:17<19:58:59,  5.90s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 125/12318 [12:17<19:58:59,  5.90s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 126/12318 [12:26<20:03:29,  5.92s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 126/12318 [12:26<20:03:29,  5.92s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 127/12318 [12:31<20:02:41,  5.92s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 127/12318 [12:31<20:02:41,  5.92s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 128/12318 [12:50<20:22:18,  6.02s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   1%| | 128/12318 [12:50<20:22:19,  6.02s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 129/12318 [12:53<20:18:10,  6.00s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 129/12318 [12:53<20:18:10,  6.00s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 130/12318 [13:02<20:22:20,  6.02s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 130/12318 [13:02<20:22:20,  6.02s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 131/12318 [13:09<20:24:46,  6.03s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 131/12318 [13:09<20:24:46,  6.03s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 132/12318 [13:12<20:19:49,  6.01s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 132/12318 [13:12<20:19:49,  6.01s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 133/12318 [13:18<20:19:07,  6.00s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   1%| | 133/12318 [13:18<20:19:07,  6.00s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 134/12318 [13:21<20:15:12,  5.98s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   1%| | 134/12318 [13:21<20:15:12,  5.98s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 135/12318 [13:23<20:08:56,  5.95s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 135/12318 [13:23<20:08:56,  5.95s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 136/12318 [13:31<20:11:28,  5.97s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   1%| | 136/12318 [13:31<20:11:28,  5.97s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 137/12318 [13:38<20:13:00,  5.97s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 137/12318 [13:38<20:13:00,  5.97s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 138/12318 [13:41<20:08:23,  5.95s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 138/12318 [13:41<20:08:24,  5.95s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 139/12318 [13:46<20:06:51,  5.95s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 139/12318 [13:46<20:06:51,  5.95s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 140/12318 [13:50<20:03:53,  5.93s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 140/12318 [13:50<20:03:53,  5.93s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 141/12318 [13:57<20:04:54,  5.94s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 141/12318 [13:57<20:04:54,  5.94s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 142/12318 [14:02<20:03:34,  5.93s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 142/12318 [14:02<20:03:34,  5.93s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 143/12318 [14:09<20:06:07,  5.94s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 143/12318 [14:09<20:06:07,  5.94s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 144/12318 [14:13<20:03:16,  5.93s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 144/12318 [14:13<20:03:16,  5.93s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 145/12318 [14:20<20:04:21,  5.94s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 145/12318 [14:20<20:04:21,  5.94s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 146/12318 [14:25<20:02:14,  5.93s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 146/12318 [14:25<20:02:14,  5.93s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 147/12318 [14:28<19:58:40,  5.91s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 147/12318 [14:28<19:58:40,  5.91s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 148/12318 [14:35<19:59:37,  5.91s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 148/12318 [14:35<19:59:37,  5.91s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 149/12318 [14:40<19:58:23,  5.91s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 149/12318 [14:40<19:58:24,  5.91s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 150/12318 [14:47<19:59:17,  5.91s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 150/12318 [14:47<19:59:17,  5.91s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 151/12318 [14:52<19:58:38,  5.91s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 151/12318 [14:52<19:58:38,  5.91s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 152/12318 [14:59<19:59:22,  5.92s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   1%| | 152/12318 [14:59<19:59:22,  5.92s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 153/12318 [15:04<19:58:02,  5.91s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 153/12318 [15:04<19:58:02,  5.91s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 154/12318 [15:07<19:54:39,  5.89s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   1%| | 154/12318 [15:07<19:54:39,  5.89s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 155/12318 [15:14<19:55:47,  5.90s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 155/12318 [15:14<19:55:47,  5.90s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 156/12318 [15:22<19:58:15,  5.91s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 156/12318 [15:22<19:58:15,  5.91s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 157/12318 [15:27<19:57:41,  5.91s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 157/12318 [15:27<19:57:41,  5.91s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 158/12318 [15:36<20:01:16,  5.93s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   1%| | 158/12318 [15:36<20:01:16,  5.93s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 159/12318 [15:44<20:03:20,  5.94s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 159/12318 [15:44<20:03:20,  5.94s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 160/12318 [16:18<20:39:38,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 160/12318 [16:18<20:39:38,  6.12s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 161/12318 [16:20<20:33:30,  6.09s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 161/12318 [16:20<20:33:30,  6.09s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 162/12318 [16:26<20:34:04,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   1%| | 162/12318 [16:26<20:34:04,  6.09s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 163/12318 [16:34<20:35:55,  6.10s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 163/12318 [16:34<20:35:55,  6.10s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 164/12318 [16:38<20:33:44,  6.09s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 164/12318 [16:38<20:33:44,  6.09s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 165/12318 [16:41<20:29:41,  6.07s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 165/12318 [16:41<20:29:41,  6.07s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 166/12318 [16:48<20:30:15,  6.07s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 166/12318 [16:48<20:30:15,  6.07s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 167/12318 [16:53<20:29:34,  6.07s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 167/12318 [16:53<20:29:34,  6.07s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 168/12318 [16:58<20:28:14,  6.07s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   1%| | 168/12318 [16:58<20:28:14,  6.07s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 169/12318 [17:03<20:26:12,  6.06s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 169/12318 [17:03<20:26:12,  6.06s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 170/12318 [17:12<20:29:15,  6.07s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 170/12318 [17:12<20:29:15,  6.07s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 171/12318 [17:15<20:26:02,  6.06s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 171/12318 [17:15<20:26:02,  6.06s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 172/12318 [17:17<20:21:34,  6.03s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 172/12318 [17:17<20:21:34,  6.03s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 173/12318 [17:25<20:23:30,  6.04s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   1%| | 173/12318 [17:25<20:23:30,  6.04s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 174/12318 [17:31<20:22:46,  6.04s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 174/12318 [17:31<20:22:47,  6.04s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 175/12318 [17:36<20:21:23,  6.04s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   1%| | 175/12318 [17:36<20:21:23,  6.04s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 176/12318 [17:40<20:18:52,  6.02s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   1%| | 176/12318 [17:40<20:18:52,  6.02s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 177/12318 [17:46<20:19:24,  6.03s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 177/12318 [17:46<20:19:24,  6.03s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 178/12318 [17:53<20:19:57,  6.03s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 178/12318 [17:53<20:19:57,  6.03s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 179/12318 [18:01<20:22:51,  6.04s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   1%| | 179/12318 [18:01<20:22:51,  6.04s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 180/12318 [18:09<20:24:31,  6.05s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   1%| | 180/12318 [18:09<20:24:31,  6.05s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 181/12318 [18:13<20:22:03,  6.04s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 181/12318 [18:13<20:22:03,  6.04s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 182/12318 [18:21<20:23:41,  6.05s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   1%| | 182/12318 [18:21<20:23:41,  6.05s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 183/12318 [18:28<20:25:18,  6.06s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 183/12318 [18:28<20:25:18,  6.06s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   1%| | 184/12318 [18:35<20:25:47,  6.06s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   1%| | 184/12318 [18:35<20:25:47,  6.06s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 185/12318 [18:38<20:22:49,  6.05s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 185/12318 [18:38<20:22:49,  6.05s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 186/12318 [18:47<20:25:41,  6.06s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 186/12318 [18:47<20:25:41,  6.06s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 187/12318 [18:55<20:27:20,  6.07s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 187/12318 [18:55<20:27:20,  6.07s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 188/12318 [19:02<20:29:03,  6.08s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 188/12318 [19:02<20:29:03,  6.08s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 189/12318 [19:07<20:27:19,  6.07s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 189/12318 [19:07<20:27:19,  6.07s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 190/12318 [19:16<20:30:06,  6.09s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 190/12318 [19:16<20:30:06,  6.09s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 191/12318 [19:21<20:28:50,  6.08s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 191/12318 [19:21<20:28:50,  6.08s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 192/12318 [19:42<20:44:42,  6.16s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 192/12318 [19:42<20:44:42,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 193/12318 [19:43<20:39:32,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 193/12318 [19:43<20:39:32,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 194/12318 [19:49<20:38:43,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 194/12318 [19:49<20:38:43,  6.13s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 195/12318 [19:54<20:38:00,  6.13s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 195/12318 [19:54<20:38:00,  6.13s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 196/12318 [19:58<20:35:12,  6.11s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 196/12318 [19:58<20:35:12,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 197/12318 [20:02<20:32:55,  6.10s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 197/12318 [20:02<20:32:55,  6.10s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 198/12318 [20:07<20:31:40,  6.10s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 198/12318 [20:07<20:31:40,  6.10s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 199/12318 [20:12<20:30:27,  6.09s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 199/12318 [20:12<20:30:27,  6.09s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 200/12318 [20:20<20:32:02,  6.10s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 200/12318 [20:20<20:32:02,  6.10s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 201/12318 [20:25<20:31:24,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 201/12318 [20:25<20:31:24,  6.10s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 202/12318 [20:29<20:29:13,  6.09s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 202/12318 [20:29<20:29:13,  6.09s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 203/12318 [20:31<20:24:52,  6.07s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 203/12318 [20:31<20:24:52,  6.07s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 204/12318 [20:33<20:20:35,  6.05s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 204/12318 [20:33<20:20:35,  6.05s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 205/12318 [20:37<20:18:23,  6.04s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 205/12318 [20:37<20:18:23,  6.04s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 206/12318 [20:39<20:14:08,  6.01s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 206/12318 [20:39<20:14:08,  6.01s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 207/12318 [20:47<20:16:39,  6.03s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 207/12318 [20:47<20:16:39,  6.03s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 208/12318 [20:49<20:11:59,  6.00s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 208/12318 [20:49<20:11:59,  6.00s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 209/12318 [20:51<20:08:53,  5.99s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 209/12318 [20:51<20:08:53,  5.99s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 210/12318 [20:57<20:08:21,  5.99s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 210/12318 [20:57<20:08:21,  5.99s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 211/12318 [21:04<20:09:44,  6.00s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 211/12318 [21:04<20:09:44,  6.00s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 212/12318 [21:09<20:08:39,  5.99s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 212/12318 [21:09<20:08:39,  5.99s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 213/12318 [21:17<20:10:12,  6.00s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 213/12318 [21:17<20:10:12,  6.00s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 214/12318 [21:19<20:06:11,  5.98s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 214/12318 [21:19<20:06:11,  5.98s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 215/12318 [21:26<20:06:41,  5.98s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 215/12318 [21:26<20:06:41,  5.98s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 216/12318 [21:32<20:07:10,  5.98s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 216/12318 [21:32<20:07:10,  5.98s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 217/12318 [21:41<20:09:33,  6.00s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 217/12318 [21:41<20:09:33,  6.00s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 218/12318 [21:47<20:09:58,  6.00s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 218/12318 [21:47<20:09:58,  6.00s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 219/12318 [21:53<20:09:27,  6.00s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 219/12318 [21:53<20:09:27,  6.00s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 220/12318 [22:01<20:10:52,  6.01s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 220/12318 [22:01<20:10:52,  6.01s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 221/12318 [22:08<20:12:13,  6.01s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 221/12318 [22:08<20:12:13,  6.01s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 222/12318 [22:11<20:09:19,  6.00s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 222/12318 [22:11<20:09:19,  6.00s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 223/12318 [22:14<20:05:54,  5.98s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 223/12318 [22:14<20:05:54,  5.98s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 224/12318 [23:00<20:42:12,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 224/12318 [23:00<20:42:12,  6.16s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 225/12318 [23:03<20:39:40,  6.15s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 225/12318 [23:03<20:39:40,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 226/12318 [23:10<20:39:58,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 226/12318 [23:10<20:39:58,  6.15s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 227/12318 [23:17<20:40:16,  6.15s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 227/12318 [23:17<20:40:16,  6.15s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 228/12318 [23:18<20:36:20,  6.14s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 228/12318 [23:18<20:36:20,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 229/12318 [23:26<20:37:32,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 229/12318 [23:26<20:37:32,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 230/12318 [23:32<20:36:54,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 230/12318 [23:32<20:36:54,  6.14s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 231/12318 [23:40<20:39:04,  6.15s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 231/12318 [23:40<20:39:04,  6.15s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 232/12318 [23:48<20:40:17,  6.16s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 232/12318 [23:48<20:40:17,  6.16s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 233/12318 [23:55<20:40:33,  6.16s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 233/12318 [23:55<20:40:33,  6.16s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 234/12318 [23:58<20:38:04,  6.15s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 234/12318 [23:58<20:38:04,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 235/12318 [24:07<20:40:05,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 235/12318 [24:07<20:40:05,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 236/12318 [24:08<20:36:18,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 236/12318 [24:08<20:36:18,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 237/12318 [24:16<20:37:30,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 237/12318 [24:16<20:37:30,  6.15s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 238/12318 [24:20<20:35:35,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 238/12318 [24:20<20:35:35,  6.14s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 239/12318 [24:29<20:37:44,  6.15s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 239/12318 [24:29<20:37:44,  6.15s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 240/12318 [24:36<20:37:59,  6.15s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 240/12318 [24:36<20:37:59,  6.15s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 241/12318 [24:37<20:33:52,  6.13s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 241/12318 [24:37<20:33:52,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 242/12318 [24:41<20:32:24,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 242/12318 [24:41<20:32:24,  6.12s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 243/12318 [24:50<20:34:31,  6.13s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 243/12318 [24:50<20:34:31,  6.13s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 244/12318 [24:54<20:32:13,  6.12s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   2%| | 244/12318 [24:54<20:32:13,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 245/12318 [24:55<20:28:10,  6.10s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 245/12318 [24:55<20:28:10,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 246/12318 [25:02<20:28:30,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 246/12318 [25:02<20:28:30,  6.11s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 247/12318 [25:10<20:30:28,  6.12s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 247/12318 [25:10<20:30:28,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 248/12318 [25:13<20:27:45,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 248/12318 [25:13<20:27:45,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 249/12318 [25:18<20:26:21,  6.10s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 249/12318 [25:18<20:26:21,  6.10s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 250/12318 [25:20<20:23:15,  6.08s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 250/12318 [25:20<20:23:15,  6.08s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 251/12318 [25:29<20:25:18,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 251/12318 [25:29<20:25:18,  6.09s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 252/12318 [25:32<20:23:07,  6.08s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 252/12318 [25:32<20:23:07,  6.08s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 253/12318 [25:41<20:25:12,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 253/12318 [25:41<20:25:13,  6.09s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 254/12318 [25:50<20:27:10,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 254/12318 [25:50<20:27:10,  6.10s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 255/12318 [25:52<20:24:08,  6.09s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 255/12318 [25:52<20:24:08,  6.09s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 256/12318 [26:23<20:43:14,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 256/12318 [26:23<20:43:14,  6.18s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 257/12318 [26:28<20:42:14,  6.18s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 257/12318 [26:28<20:42:14,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 258/12318 [26:34<20:42:29,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 258/12318 [26:34<20:42:29,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 259/12318 [26:42<20:43:34,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 259/12318 [26:42<20:43:34,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 260/12318 [26:50<20:44:36,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 260/12318 [26:50<20:44:36,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 261/12318 [26:57<20:45:42,  6.20s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 261/12318 [26:57<20:45:42,  6.20s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 262/12318 [26:59<20:41:51,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 262/12318 [26:59<20:41:51,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 263/12318 [27:08<20:43:45,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 263/12318 [27:08<20:43:45,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 264/12318 [27:13<20:43:08,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 264/12318 [27:13<20:43:08,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 265/12318 [27:17<20:40:57,  6.18s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 265/12318 [27:17<20:40:57,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 266/12318 [27:21<20:39:10,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 266/12318 [27:21<20:39:10,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 267/12318 [27:28<20:40:16,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 267/12318 [27:28<20:40:16,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 268/12318 [27:36<20:41:16,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 268/12318 [27:36<20:41:16,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 269/12318 [27:39<20:38:43,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 269/12318 [27:39<20:38:43,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 270/12318 [27:47<20:40:27,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 270/12318 [27:47<20:40:27,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 271/12318 [27:51<20:38:43,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 271/12318 [27:51<20:38:43,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 272/12318 [27:56<20:37:47,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 272/12318 [27:56<20:37:47,  6.17s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 273/12318 [27:58<20:34:08,  6.15s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 273/12318 [27:58<20:34:08,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 274/12318 [28:07<20:36:00,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 274/12318 [28:07<20:36:00,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 275/12318 [28:11<20:34:40,  6.15s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 275/12318 [28:11<20:34:40,  6.15s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 276/12318 [28:14<20:32:12,  6.14s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 276/12318 [28:14<20:32:12,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 277/12318 [28:20<20:31:39,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 277/12318 [28:20<20:31:39,  6.14s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 278/12318 [28:23<20:29:35,  6.13s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 278/12318 [28:23<20:29:35,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 279/12318 [28:26<20:27:32,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 279/12318 [28:26<20:27:32,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 280/12318 [28:31<20:26:17,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 280/12318 [28:31<20:26:17,  6.11s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 281/12318 [28:39<20:27:20,  6.12s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 281/12318 [28:39<20:27:20,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 282/12318 [28:41<20:24:35,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 282/12318 [28:41<20:24:35,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 283/12318 [28:43<20:21:49,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 283/12318 [28:43<20:21:49,  6.09s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 284/12318 [28:47<20:20:15,  6.08s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 284/12318 [28:47<20:20:15,  6.08s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 285/12318 [28:53<20:19:45,  6.08s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 285/12318 [28:53<20:19:45,  6.08s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 286/12318 [28:57<20:18:36,  6.08s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   2%| | 286/12318 [28:57<20:18:36,  6.08s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 287/12318 [29:01<20:16:39,  6.07s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 287/12318 [29:01<20:16:39,  6.07s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 288/12318 [29:29<20:31:43,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 288/12318 [29:29<20:31:43,  6.14s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 289/12318 [29:32<20:29:45,  6.13s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 289/12318 [29:32<20:29:45,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 290/12318 [29:38<20:29:16,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 290/12318 [29:38<20:29:16,  6.13s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 291/12318 [29:45<20:30:12,  6.14s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 291/12318 [29:45<20:30:12,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 292/12318 [29:53<20:31:13,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 292/12318 [29:53<20:31:13,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 293/12318 [29:58<20:30:00,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 293/12318 [29:58<20:30:00,  6.14s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 294/12318 [30:01<20:28:05,  6.13s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 294/12318 [30:01<20:28:05,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 295/12318 [30:06<20:27:16,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 295/12318 [30:06<20:27:16,  6.12s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 296/12318 [30:09<20:24:39,  6.11s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   2%| | 296/12318 [30:09<20:24:39,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 297/12318 [30:15<20:24:53,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 297/12318 [30:15<20:24:53,  6.11s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 298/12318 [30:22<20:25:10,  6.12s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   2%| | 298/12318 [30:22<20:25:10,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 299/12318 [30:24<20:22:33,  6.10s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 299/12318 [30:24<20:22:33,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 300/12318 [30:32<20:23:28,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   2%| | 300/12318 [30:32<20:23:28,  6.11s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 301/12318 [30:41<20:25:10,  6.12s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 301/12318 [30:41<20:25:10,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 302/12318 [30:45<20:23:58,  6.11s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 302/12318 [30:45<20:23:58,  6.11s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 303/12318 [30:53<20:24:54,  6.12s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 303/12318 [30:53<20:24:54,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 304/12318 [31:01<20:25:48,  6.12s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 304/12318 [31:01<20:25:48,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 305/12318 [31:05<20:24:38,  6.12s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   2%| | 305/12318 [31:05<20:24:38,  6.12s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 306/12318 [31:13<20:25:36,  6.12s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   2%| | 306/12318 [31:13<20:25:36,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   2%| | 307/12318 [31:14<20:22:22,  6.11s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   2%| | 307/12318 [31:14<20:22:22,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 308/12318 [31:22<20:23:19,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 308/12318 [31:22<20:23:19,  6.11s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 309/12318 [31:26<20:21:49,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 309/12318 [31:26<20:21:49,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 310/12318 [31:29<20:19:39,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 310/12318 [31:29<20:19:39,  6.09s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 311/12318 [31:33<20:18:32,  6.09s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 311/12318 [31:33<20:18:32,  6.09s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 312/12318 [31:42<20:20:07,  6.10s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 312/12318 [31:42<20:20:07,  6.10s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 313/12318 [31:49<20:20:25,  6.10s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 313/12318 [31:49<20:20:25,  6.10s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 314/12318 [31:54<20:20:00,  6.10s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 314/12318 [31:54<20:20:00,  6.10s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 315/12318 [31:59<20:18:55,  6.09s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   3%| | 315/12318 [31:59<20:18:55,  6.09s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 316/12318 [32:06<20:19:13,  6.10s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 316/12318 [32:06<20:19:13,  6.10s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 317/12318 [32:09<20:17:29,  6.09s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 317/12318 [32:09<20:17:29,  6.09s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 318/12318 [32:14<20:16:46,  6.08s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 318/12318 [32:14<20:16:46,  6.08s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 319/12318 [32:16<20:14:01,  6.07s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 319/12318 [32:16<20:14:01,  6.07s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 320/12318 [32:48<20:30:13,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 320/12318 [32:48<20:30:13,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 321/12318 [32:56<20:31:03,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 321/12318 [32:56<20:31:03,  6.16s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 322/12318 [33:01<20:30:13,  6.15s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 322/12318 [33:01<20:30:13,  6.15s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 323/12318 [33:09<20:31:40,  6.16s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 323/12318 [33:09<20:31:40,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 324/12318 [33:12<20:29:32,  6.15s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 324/12318 [33:12<20:29:32,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 325/12318 [33:20<20:30:23,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 325/12318 [33:20<20:30:23,  6.16s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 326/12318 [33:22<20:27:57,  6.14s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 326/12318 [33:22<20:27:57,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 327/12318 [33:29<20:28:09,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 327/12318 [33:29<20:28:09,  6.15s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 328/12318 [33:37<20:28:59,  6.15s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 328/12318 [33:37<20:28:59,  6.15s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 329/12318 [33:39<20:26:37,  6.14s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 329/12318 [33:39<20:26:37,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 330/12318 [33:46<20:26:51,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 330/12318 [33:46<20:26:51,  6.14s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 331/12318 [33:49<20:24:51,  6.13s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 331/12318 [33:49<20:24:51,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 332/12318 [33:57<20:25:43,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 332/12318 [33:57<20:25:43,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 333/12318 [34:02<20:24:58,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 333/12318 [34:02<20:24:58,  6.13s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 334/12318 [34:06<20:23:54,  6.13s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 334/12318 [34:06<20:23:54,  6.13s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 335/12318 [34:09<20:21:33,  6.12s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 335/12318 [34:09<20:21:33,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 336/12318 [34:16<20:22:22,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 336/12318 [34:16<20:22:22,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 337/12318 [34:24<20:23:15,  6.13s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 337/12318 [34:24<20:23:15,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 338/12318 [34:33<20:24:43,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 338/12318 [34:33<20:24:43,  6.13s/it, v_num=e4xv, train/loss=7."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 339/12318 [34:34<20:21:47,  6.12s/it, v_num=e4xv, train/loss=7.\r",
-      "Epoch 0:   3%| | 339/12318 [34:34<20:21:47,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 340/12318 [34:43<20:23:13,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 340/12318 [34:43<20:23:13,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 341/12318 [34:46<20:21:31,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 341/12318 [34:46<20:21:31,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 342/12318 [34:55<20:22:54,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 342/12318 [34:55<20:22:54,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 343/12318 [35:00<20:22:27,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 343/12318 [35:00<20:22:27,  6.13s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 344/12318 [35:09<20:23:48,  6.13s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   3%| | 344/12318 [35:09<20:23:48,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 345/12318 [35:12<20:21:48,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 345/12318 [35:12<20:21:48,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 346/12318 [35:14<20:19:13,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 346/12318 [35:14<20:19:13,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 347/12318 [35:22<20:20:34,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 347/12318 [35:22<20:20:34,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 348/12318 [35:26<20:19:13,  6.11s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 348/12318 [35:26<20:19:13,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 349/12318 [35:33<20:19:20,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 349/12318 [35:33<20:19:20,  6.11s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 350/12318 [35:40<20:20:07,  6.12s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 350/12318 [35:40<20:20:07,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 351/12318 [35:45<20:19:21,  6.11s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 351/12318 [35:45<20:19:21,  6.11s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 352/12318 [35:59<20:23:44,  6.14s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 352/12318 [35:59<20:23:44,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 353/12318 [36:07<20:24:31,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 353/12318 [36:07<20:24:31,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 354/12318 [36:14<20:24:43,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 354/12318 [36:14<20:24:43,  6.14s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 355/12318 [36:22<20:25:33,  6.15s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 355/12318 [36:22<20:25:33,  6.15s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 356/12318 [36:28<20:25:45,  6.15s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 356/12318 [36:28<20:25:45,  6.15s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 357/12318 [36:33<20:24:44,  6.14s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 357/12318 [36:33<20:24:44,  6.14s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 358/12318 [36:36<20:22:50,  6.13s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 358/12318 [36:36<20:22:50,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 359/12318 [36:38<20:20:38,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 359/12318 [36:38<20:20:38,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 360/12318 [36:43<20:19:57,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 360/12318 [36:43<20:19:57,  6.12s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 361/12318 [36:51<20:20:44,  6.13s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 361/12318 [36:51<20:20:44,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 362/12318 [36:56<20:20:21,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 362/12318 [36:56<20:20:21,  6.12s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 363/12318 [37:00<20:19:04,  6.12s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   3%| | 363/12318 [37:00<20:19:04,  6.12s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 364/12318 [37:04<20:17:49,  6.11s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 364/12318 [37:04<20:17:49,  6.11s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 365/12318 [37:12<20:18:35,  6.12s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 365/12318 [37:12<20:18:35,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 366/12318 [37:14<20:15:53,  6.10s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 366/12318 [37:14<20:15:53,  6.10s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 367/12318 [37:19<20:15:29,  6.10s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 367/12318 [37:19<20:15:29,  6.10s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 368/12318 [37:27<20:16:09,  6.11s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 368/12318 [37:27<20:16:09,  6.11s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 369/12318 [37:32<20:15:54,  6.11s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 369/12318 [37:32<20:15:54,  6.11s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 370/12318 [37:37<20:15:14,  6.10s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   3%| | 370/12318 [37:37<20:15:14,  6.10s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 371/12318 [37:44<20:15:25,  6.10s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 371/12318 [37:44<20:15:25,  6.10s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 372/12318 [37:53<20:16:44,  6.11s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 372/12318 [37:53<20:16:44,  6.11s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 373/12318 [38:01<20:17:27,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 373/12318 [38:01<20:17:27,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 374/12318 [38:06<20:17:03,  6.11s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 374/12318 [38:06<20:17:03,  6.11s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 375/12318 [38:14<20:17:48,  6.12s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 375/12318 [38:14<20:17:48,  6.12s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 376/12318 [38:21<20:18:31,  6.12s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 376/12318 [38:21<20:18:31,  6.12s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 377/12318 [38:29<20:19:14,  6.13s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 377/12318 [38:29<20:19:14,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 378/12318 [38:38<20:20:30,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 378/12318 [38:38<20:20:30,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 379/12318 [38:42<20:19:16,  6.13s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 379/12318 [38:42<20:19:16,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 380/12318 [38:47<20:18:51,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 380/12318 [38:47<20:18:51,  6.13s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 381/12318 [38:54<20:19:00,  6.13s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 381/12318 [38:54<20:19:00,  6.13s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 382/12318 [39:03<20:20:15,  6.13s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 382/12318 [39:03<20:20:15,  6.13s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 383/12318 [39:11<20:21:26,  6.14s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 383/12318 [39:11<20:21:26,  6.14s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 384/12318 [39:28<20:26:52,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 384/12318 [39:28<20:26:52,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 385/12318 [39:37<20:28:00,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 385/12318 [39:37<20:28:00,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 386/12318 [39:44<20:28:36,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 386/12318 [39:44<20:28:36,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 387/12318 [39:53<20:29:46,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 387/12318 [39:53<20:29:46,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 388/12318 [39:59<20:29:23,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 388/12318 [39:59<20:29:23,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 389/12318 [40:05<20:29:30,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 389/12318 [40:05<20:29:30,  6.18s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 390/12318 [40:11<20:29:05,  6.18s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 390/12318 [40:11<20:29:05,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 391/12318 [40:13<20:27:03,  6.17s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 391/12318 [40:13<20:27:03,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 392/12318 [40:22<20:28:12,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 392/12318 [40:22<20:28:12,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 393/12318 [40:28<20:28:23,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 393/12318 [40:28<20:28:23,  6.18s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 394/12318 [40:30<20:26:06,  6.17s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 394/12318 [40:30<20:26:06,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 395/12318 [40:38<20:26:41,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 395/12318 [40:38<20:26:42,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 396/12318 [40:44<20:26:47,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 396/12318 [40:44<20:26:47,  6.17s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 397/12318 [40:48<20:25:31,  6.17s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 397/12318 [40:48<20:25:31,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 398/12318 [40:50<20:23:01,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 398/12318 [40:50<20:23:01,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 399/12318 [40:58<20:24:15,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 399/12318 [40:58<20:24:15,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 400/12318 [41:07<20:25:21,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 400/12318 [41:07<20:25:21,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 401/12318 [41:16<20:26:29,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 401/12318 [41:16<20:26:29,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 402/12318 [41:23<20:27:04,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 402/12318 [41:23<20:27:04,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 403/12318 [41:29<20:26:39,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 403/12318 [41:29<20:26:39,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 404/12318 [41:36<20:27:16,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 404/12318 [41:36<20:27:16,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 405/12318 [41:39<20:25:32,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 405/12318 [41:39<20:25:32,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 406/12318 [41:44<20:24:35,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 406/12318 [41:44<20:24:35,  6.17s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 407/12318 [41:49<20:23:52,  6.17s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 407/12318 [41:49<20:23:52,  6.17s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 408/12318 [41:52<20:22:25,  6.16s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 408/12318 [41:52<20:22:25,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 409/12318 [41:59<20:22:30,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 409/12318 [41:59<20:22:31,  6.16s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 410/12318 [42:00<20:20:18,  6.15s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 410/12318 [42:00<20:20:18,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 411/12318 [42:03<20:18:37,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 411/12318 [42:03<20:18:37,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 412/12318 [42:12<20:19:43,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 412/12318 [42:12<20:19:43,  6.15s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 413/12318 [42:19<20:19:50,  6.15s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 413/12318 [42:19<20:19:50,  6.15s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 414/12318 [42:22<20:18:26,  6.14s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 414/12318 [42:22<20:18:26,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 415/12318 [42:31<20:19:31,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 415/12318 [42:31<20:19:31,  6.15s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 416/12318 [42:48<20:24:44,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 416/12318 [42:48<20:24:44,  6.17s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 417/12318 [42:51<20:23:05,  6.17s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 417/12318 [42:51<20:23:05,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 418/12318 [43:00<20:24:13,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 418/12318 [43:00<20:24:13,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 419/12318 [43:06<20:24:18,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 419/12318 [43:06<20:24:18,  6.17s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 420/12318 [43:12<20:23:53,  6.17s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   3%| | 420/12318 [43:12<20:23:53,  6.17s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 421/12318 [43:18<20:23:58,  6.17s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 421/12318 [43:18<20:23:58,  6.17s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 422/12318 [43:20<20:21:49,  6.16s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   3%| | 422/12318 [43:20<20:21:49,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 423/12318 [43:29<20:22:51,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   3%| | 423/12318 [43:29<20:22:51,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 424/12318 [43:37<20:23:55,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 424/12318 [43:37<20:23:55,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 425/12318 [43:43<20:23:31,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 425/12318 [43:43<20:23:31,  6.17s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 426/12318 [43:51<20:24:05,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   3%| | 426/12318 [43:51<20:24:05,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 427/12318 [43:59<20:25:09,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   3%| | 427/12318 [43:59<20:25:09,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 428/12318 [44:08<20:26:09,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 428/12318 [44:08<20:26:09,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 429/12318 [44:16<20:27:08,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 429/12318 [44:16<20:27:08,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 430/12318 [44:21<20:26:28,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   3%| | 430/12318 [44:21<20:26:28,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   3%| | 431/12318 [44:28<20:26:32,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   3%| | 431/12318 [44:28<20:26:32,  6.19s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 432/12318 [44:33<20:25:51,  6.19s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 432/12318 [44:33<20:25:51,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 433/12318 [44:41<20:26:52,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 433/12318 [44:41<20:26:52,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 434/12318 [44:47<20:26:25,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 434/12318 [44:47<20:26:25,  6.19s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 435/12318 [44:54<20:26:56,  6.20s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 435/12318 [44:54<20:26:56,  6.20s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 436/12318 [45:00<20:26:32,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 436/12318 [45:00<20:26:32,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 437/12318 [45:04<20:25:26,  6.19s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 437/12318 [45:04<20:25:26,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 438/12318 [45:10<20:25:29,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 438/12318 [45:10<20:25:29,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 439/12318 [45:19<20:26:33,  6.20s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 439/12318 [45:19<20:26:33,  6.20s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 440/12318 [45:24<20:25:54,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 440/12318 [45:24<20:25:54,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 441/12318 [45:27<20:24:19,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 441/12318 [45:27<20:24:19,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 442/12318 [45:28<20:22:02,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 442/12318 [45:28<20:22:02,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 443/12318 [45:31<20:20:28,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 443/12318 [45:31<20:20:28,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 444/12318 [45:39<20:21:00,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 444/12318 [45:39<20:21:00,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 445/12318 [45:43<20:19:54,  6.16s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 445/12318 [45:43<20:19:54,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 446/12318 [45:50<20:20:27,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 446/12318 [45:50<20:20:27,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 447/12318 [45:59<20:21:14,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 447/12318 [45:59<20:21:14,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 448/12318 [46:14<20:25:07,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 448/12318 [46:14<20:25:07,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 449/12318 [46:17<20:23:48,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 449/12318 [46:17<20:23:48,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 450/12318 [46:22<20:22:56,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 450/12318 [46:22<20:22:56,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 451/12318 [46:29<20:23:26,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 451/12318 [46:29<20:23:27,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 452/12318 [46:37<20:23:58,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 452/12318 [46:37<20:23:58,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 453/12318 [46:41<20:23:05,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 453/12318 [46:41<20:23:05,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 454/12318 [46:49<20:23:37,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 454/12318 [46:49<20:23:37,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 455/12318 [46:56<20:23:42,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 455/12318 [46:56<20:23:42,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 456/12318 [46:59<20:22:23,  6.18s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 456/12318 [46:59<20:22:23,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 457/12318 [47:04<20:21:47,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 457/12318 [47:04<20:21:47,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 458/12318 [47:12<20:22:18,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 458/12318 [47:12<20:22:18,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 459/12318 [47:17<20:21:41,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 459/12318 [47:17<20:21:41,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 460/12318 [47:23<20:21:47,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 460/12318 [47:23<20:21:47,  6.18s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 461/12318 [47:31<20:22:20,  6.19s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 461/12318 [47:31<20:22:20,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 462/12318 [47:39<20:22:55,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 462/12318 [47:39<20:22:55,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 463/12318 [47:44<20:22:18,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 463/12318 [47:44<20:22:18,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 464/12318 [47:48<20:21:28,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 464/12318 [47:48<20:21:28,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 465/12318 [47:57<20:22:29,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 465/12318 [47:57<20:22:29,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 466/12318 [48:00<20:21:00,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 466/12318 [48:00<20:21:01,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 467/12318 [48:08<20:21:32,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 467/12318 [48:08<20:21:32,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 468/12318 [48:10<20:19:49,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 468/12318 [48:10<20:19:49,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 469/12318 [48:13<20:18:34,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 469/12318 [48:13<20:18:34,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 470/12318 [48:20<20:18:39,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 470/12318 [48:20<20:18:39,  6.17s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 471/12318 [48:26<20:18:16,  6.17s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 471/12318 [48:26<20:18:16,  6.17s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 472/12318 [48:28<20:16:47,  6.16s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 472/12318 [48:28<20:16:47,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 473/12318 [48:32<20:15:31,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 473/12318 [48:32<20:15:31,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 474/12318 [48:38<20:15:36,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 474/12318 [48:38<20:15:36,  6.16s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 475/12318 [48:47<20:16:36,  6.16s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 475/12318 [48:47<20:16:36,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 476/12318 [48:49<20:14:43,  6.15s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 476/12318 [48:49<20:14:43,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 477/12318 [48:54<20:14:10,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 477/12318 [48:54<20:14:11,  6.15s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 478/12318 [49:00<20:13:49,  6.15s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 478/12318 [49:00<20:13:49,  6.15s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 479/12318 [49:03<20:12:23,  6.14s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 479/12318 [49:03<20:12:23,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 480/12318 [49:34<20:22:30,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 480/12318 [49:34<20:22:30,  6.20s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 481/12318 [49:38<20:21:29,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 481/12318 [49:38<20:21:29,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 482/12318 [49:45<20:21:58,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 482/12318 [49:45<20:21:58,  6.19s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 483/12318 [49:47<20:19:53,  6.18s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 483/12318 [49:47<20:19:53,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 484/12318 [49:53<20:19:56,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 484/12318 [49:53<20:19:56,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 485/12318 [49:56<20:18:31,  6.18s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 485/12318 [49:56<20:18:31,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 486/12318 [50:04<20:19:01,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 486/12318 [50:04<20:19:01,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 487/12318 [50:10<20:19:08,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 487/12318 [50:10<20:19:08,  6.18s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 488/12318 [50:16<20:18:47,  6.18s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 488/12318 [50:16<20:18:47,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 489/12318 [50:17<20:16:43,  6.17s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 489/12318 [50:17<20:16:43,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 490/12318 [50:26<20:17:39,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 490/12318 [50:26<20:17:39,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 491/12318 [50:29<20:16:01,  6.17s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 491/12318 [50:29<20:16:01,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 492/12318 [50:36<20:16:32,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 492/12318 [50:36<20:16:32,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 493/12318 [50:43<20:16:36,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 493/12318 [50:43<20:16:36,  6.17s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 494/12318 [50:44<20:14:34,  6.16s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 494/12318 [50:44<20:14:34,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 495/12318 [50:53<20:15:28,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 495/12318 [50:53<20:15:28,  6.17s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 496/12318 [50:56<20:14:04,  6.16s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 496/12318 [50:56<20:14:04,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 497/12318 [51:03<20:14:36,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 497/12318 [51:03<20:14:36,  6.16s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 498/12318 [51:09<20:14:05,  6.16s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 498/12318 [51:09<20:14:05,  6.16s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 499/12318 [51:16<20:14:34,  6.17s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 499/12318 [51:16<20:14:35,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 500/12318 [51:19<20:13:12,  6.16s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 500/12318 [51:19<20:13:12,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 501/12318 [51:23<20:12:03,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 501/12318 [51:23<20:12:03,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 502/12318 [51:28<20:11:43,  6.15s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 502/12318 [51:28<20:11:43,  6.15s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 503/12318 [51:34<20:11:26,  6.15s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 503/12318 [51:34<20:11:26,  6.15s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 504/12318 [51:43<20:12:19,  6.16s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 504/12318 [51:43<20:12:19,  6.16s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 505/12318 [51:46<20:11:07,  6.15s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 505/12318 [51:46<20:11:07,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 506/12318 [51:48<20:09:20,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 506/12318 [51:48<20:09:20,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 507/12318 [51:52<20:08:21,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 507/12318 [51:52<20:08:21,  6.14s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 508/12318 [51:55<20:07:11,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 508/12318 [51:55<20:07:11,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 509/12318 [52:03<20:07:39,  6.14s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 509/12318 [52:03<20:07:39,  6.14s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 510/12318 [52:05<20:05:53,  6.13s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 510/12318 [52:05<20:05:53,  6.13s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 511/12318 [52:12<20:06:22,  6.13s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 511/12318 [52:12<20:06:22,  6.13s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 512/12318 [53:02<20:23:03,  6.22s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 512/12318 [53:02<20:23:03,  6.22s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 513/12318 [53:09<20:23:04,  6.22s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 513/12318 [53:09<20:23:04,  6.22s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 514/12318 [53:11<20:21:29,  6.21s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 514/12318 [53:11<20:21:29,  6.21s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 515/12318 [53:15<20:20:29,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 515/12318 [53:15<20:20:29,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 516/12318 [53:21<20:20:31,  6.21s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 516/12318 [53:21<20:20:31,  6.21s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 517/12318 [53:23<20:18:34,  6.20s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 517/12318 [53:23<20:18:34,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 518/12318 [53:31<20:19:24,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 518/12318 [53:31<20:19:24,  6.20s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 519/12318 [53:40<20:20:12,  6.20s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 519/12318 [53:40<20:20:12,  6.20s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 520/12318 [53:46<20:20:14,  6.21s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 520/12318 [53:46<20:20:14,  6.21s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 521/12318 [53:52<20:19:53,  6.20s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 521/12318 [53:52<20:19:53,  6.20s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 522/12318 [53:59<20:19:56,  6.21s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 522/12318 [53:59<20:19:56,  6.21s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 523/12318 [54:06<20:20:22,  6.21s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 523/12318 [54:06<20:20:22,  6.21s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 524/12318 [54:13<20:20:24,  6.21s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 524/12318 [54:13<20:20:24,  6.21s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 525/12318 [54:15<20:18:41,  6.20s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 525/12318 [54:15<20:18:41,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 526/12318 [54:18<20:17:32,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 526/12318 [54:18<20:17:32,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 527/12318 [54:22<20:16:24,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 527/12318 [54:22<20:16:24,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 528/12318 [54:27<20:16:03,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 528/12318 [54:27<20:16:03,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 529/12318 [54:31<20:15:06,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 529/12318 [54:31<20:15:06,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 530/12318 [54:37<20:14:45,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 530/12318 [54:37<20:14:45,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 531/12318 [54:41<20:14:12,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   4%| | 531/12318 [54:41<20:14:12,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 532/12318 [54:50<20:15:05,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 532/12318 [54:50<20:15:05,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 533/12318 [54:52<20:13:24,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 533/12318 [54:52<20:13:24,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 534/12318 [54:55<20:12:06,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 534/12318 [54:55<20:12:06,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 535/12318 [55:03<20:12:33,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 535/12318 [55:03<20:12:33,  6.17s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 536/12318 [55:09<20:12:37,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 536/12318 [55:09<20:12:37,  6.18s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 537/12318 [55:17<20:13:05,  6.18s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 537/12318 [55:17<20:13:05,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 538/12318 [55:21<20:12:10,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   4%| | 538/12318 [55:21<20:12:10,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 539/12318 [55:26<20:11:38,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 539/12318 [55:26<20:11:38,  6.17s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 540/12318 [55:35<20:12:31,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 540/12318 [55:35<20:12:31,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 541/12318 [55:42<20:12:34,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   4%| | 541/12318 [55:42<20:12:34,  6.18s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 542/12318 [55:47<20:12:01,  6.18s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 542/12318 [55:47<20:12:01,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 543/12318 [55:50<20:10:55,  6.17s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 543/12318 [55:50<20:10:55,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 544/12318 [56:23<20:20:37,  6.22s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 544/12318 [56:23<20:20:37,  6.22s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 545/12318 [56:26<20:19:19,  6.21s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 545/12318 [56:26<20:19:19,  6.21s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 546/12318 [56:33<20:19:23,  6.22s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 546/12318 [56:33<20:19:23,  6.22s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 547/12318 [56:35<20:17:56,  6.21s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 547/12318 [56:35<20:17:56,  6.21s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 548/12318 [56:38<20:16:38,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 548/12318 [56:38<20:16:38,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 549/12318 [56:43<20:16:06,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 549/12318 [56:43<20:16:06,  6.20s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 550/12318 [56:50<20:16:08,  6.20s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   4%| | 550/12318 [56:50<20:16:08,  6.20s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 551/12318 [56:53<20:15:02,  6.20s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   4%| | 551/12318 [56:53<20:15:02,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 552/12318 [56:55<20:13:23,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 552/12318 [56:55<20:13:23,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 553/12318 [57:02<20:13:27,  6.19s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   4%| | 553/12318 [57:02<20:13:27,  6.19s/it, v_num=e4xv, train/loss=3."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   4%| | 554/12318 [57:08<20:13:31,  6.19s/it, v_num=e4xv, train/loss=3.\r",
-      "Epoch 0:   4%| | 554/12318 [57:08<20:13:31,  6.19s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 555/12318 [57:14<20:13:12,  6.19s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   5%| | 555/12318 [57:14<20:13:12,  6.19s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 556/12318 [57:17<20:12:09,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   5%| | 556/12318 [57:17<20:12:09,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 557/12318 [57:22<20:11:38,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 557/12318 [57:22<20:11:38,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 558/12318 [57:31<20:12:26,  6.19s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   5%| | 558/12318 [57:31<20:12:26,  6.19s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 559/12318 [57:39<20:12:49,  6.19s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   5%| | 559/12318 [57:39<20:12:49,  6.19s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 560/12318 [57:44<20:12:17,  6.19s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   5%| | 560/12318 [57:44<20:12:17,  6.19s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 561/12318 [57:46<20:10:40,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   5%| | 561/12318 [57:46<20:10:40,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 562/12318 [57:51<20:10:19,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 562/12318 [57:51<20:10:19,  6.18s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 563/12318 [58:00<20:11:03,  6.18s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   5%| | 563/12318 [58:00<20:11:03,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 564/12318 [58:04<20:10:23,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   5%| | 564/12318 [58:04<20:10:23,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 565/12318 [58:06<20:08:47,  6.17s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 565/12318 [58:06<20:08:47,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 566/12318 [58:15<20:09:35,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 566/12318 [58:15<20:09:35,  6.18s/it, v_num=e4xv, train/loss=6."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 567/12318 [58:22<20:09:58,  6.18s/it, v_num=e4xv, train/loss=6.\r",
-      "Epoch 0:   5%| | 567/12318 [58:22<20:09:58,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 568/12318 [58:27<20:09:27,  6.18s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   5%| | 568/12318 [58:27<20:09:27,  6.18s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 569/12318 [58:35<20:09:52,  6.18s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   5%| | 569/12318 [58:35<20:09:52,  6.18s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 570/12318 [58:36<20:08:06,  6.17s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   5%| | 570/12318 [58:36<20:08:06,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 571/12318 [58:40<20:07:13,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 571/12318 [58:40<20:07:14,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 572/12318 [58:42<20:05:27,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 572/12318 [58:42<20:05:27,  6.16s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 573/12318 [58:48<20:05:29,  6.16s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 573/12318 [58:48<20:05:29,  6.16s/it, v_num=e4xv, train/loss=4."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 574/12318 [58:53<20:04:48,  6.16s/it, v_num=e4xv, train/loss=4.\r",
-      "Epoch 0:   5%| | 574/12318 [58:53<20:04:48,  6.16s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 575/12318 [58:56<20:03:46,  6.15s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   5%| | 575/12318 [58:56<20:03:46,  6.15s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 576/12318 [59:33<20:14:02,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 576/12318 [59:33<20:14:02,  6.20s/it, v_num=e4xv, train/loss=5."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 577/12318 [59:38<20:13:31,  6.20s/it, v_num=e4xv, train/loss=5.\r",
-      "Epoch 0:   5%| | 577/12318 [59:38<20:13:31,  6.20s/it, v_num=e4xv, train/loss=1."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 578/12318 [59:41<20:12:28,  6.20s/it, v_num=e4xv, train/loss=1.\r",
-      "Epoch 0:   5%| | 578/12318 [59:41<20:12:28,  6.20s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 579/12318 [59:47<20:12:07,  6.20s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 579/12318 [59:47<20:12:07,  6.20s/it, v_num=e4xv, train/loss=2."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 580/12318 [59:48<20:10:33,  6.19s/it, v_num=e4xv, train/loss=2.\r",
-      "Epoch 0:   5%| | 580/12318 [59:48<20:10:33,  6.19s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 581/12318 [59:51<20:09:20,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 581/12318 [59:51<20:09:20,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 582/12318 [59:55<20:08:28,  6.18s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 582/12318 [59:55<20:08:28,  6.18s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 583/12318 [59:59<20:07:27,  6.17s/it, v_num=e4xv, train/loss=0.\r",
-      "Epoch 0:   5%| | 583/12318 [59:59<20:07:27,  6.17s/it, v_num=e4xv, train/loss=0."
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 584/12318 [1:00:07<20:08:11,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 584/12318 [1:00:07<20:08:11,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 585/12318 [1:00:16<20:08:54,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 585/12318 [1:00:16<20:08:54,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 586/12318 [1:00:17<20:07:10,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 586/12318 [1:00:17<20:07:10,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 587/12318 [1:00:22<20:06:40,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 587/12318 [1:00:22<20:06:40,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 588/12318 [1:00:29<20:06:42,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 588/12318 [1:00:29<20:06:42,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 589/12318 [1:00:38<20:07:27,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 589/12318 [1:00:38<20:07:27,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 590/12318 [1:00:42<20:06:37,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 590/12318 [1:00:42<20:06:37,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 591/12318 [1:00:50<20:07:21,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 591/12318 [1:00:50<20:07:21,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 592/12318 [1:00:59<20:08:04,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 592/12318 [1:00:59<20:08:04,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 593/12318 [1:01:08<20:08:46,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 593/12318 [1:01:08<20:08:46,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 594/12318 [1:01:15<20:09:08,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 594/12318 [1:01:15<20:09:08,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 595/12318 [1:01:22<20:09:07,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 595/12318 [1:01:22<20:09:07,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 596/12318 [1:01:29<20:09:28,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 596/12318 [1:01:29<20:09:28,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 597/12318 [1:01:34<20:08:57,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 597/12318 [1:01:34<20:08:57,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 598/12318 [1:01:42<20:09:18,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 598/12318 [1:01:42<20:09:18,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 599/12318 [1:01:51<20:10:03,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 599/12318 [1:01:51<20:10:03,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 600/12318 [1:01:58<20:10:27,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 600/12318 [1:01:58<20:10:27,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 601/12318 [1:02:06<20:10:50,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 601/12318 [1:02:06<20:10:50,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 602/12318 [1:02:11<20:10:20,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 602/12318 [1:02:11<20:10:20,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 603/12318 [1:02:16<20:09:59,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 603/12318 [1:02:16<20:09:59,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 604/12318 [1:02:22<20:09:40,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 604/12318 [1:02:22<20:09:40,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 605/12318 [1:02:30<20:10:03,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 605/12318 [1:02:30<20:10:03,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 606/12318 [1:02:34<20:09:23,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 606/12318 [1:02:34<20:09:23,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 607/12318 [1:02:38<20:08:34,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 607/12318 [1:02:38<20:08:34,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 608/12318 [1:02:52<20:10:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 608/12318 [1:02:52<20:10:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 609/12318 [1:02:54<20:09:32,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 609/12318 [1:02:54<20:09:32,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 610/12318 [1:02:59<20:08:52,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 610/12318 [1:02:59<20:08:52,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 611/12318 [1:03:04<20:08:33,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 611/12318 [1:03:04<20:08:33,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 612/12318 [1:03:13<20:09:14,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 612/12318 [1:03:13<20:09:14,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 613/12318 [1:03:20<20:09:34,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 613/12318 [1:03:20<20:09:34,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 614/12318 [1:03:28<20:09:55,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 614/12318 [1:03:28<20:09:55,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 615/12318 [1:03:37<20:10:36,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 615/12318 [1:03:37<20:10:36,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 616/12318 [1:03:42<20:10:16,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 616/12318 [1:03:42<20:10:16,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 617/12318 [1:03:44<20:08:47,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 617/12318 [1:03:44<20:08:47,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 618/12318 [1:03:49<20:08:28,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 618/12318 [1:03:49<20:08:28,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 619/12318 [1:03:56<20:08:30,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 619/12318 [1:03:56<20:08:30,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 620/12318 [1:04:03<20:08:31,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 620/12318 [1:04:03<20:08:31,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 621/12318 [1:04:04<20:06:53,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 621/12318 [1:04:04<20:06:53,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 622/12318 [1:04:10<20:06:52,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 622/12318 [1:04:10<20:06:52,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 623/12318 [1:04:14<20:06:04,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 623/12318 [1:04:14<20:06:04,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 624/12318 [1:04:16<20:04:26,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 624/12318 [1:04:16<20:04:26,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 625/12318 [1:04:19<20:03:18,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 625/12318 [1:04:19<20:03:18,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 626/12318 [1:04:23<20:02:48,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 626/12318 [1:04:23<20:02:48,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 627/12318 [1:04:30<20:02:49,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 627/12318 [1:04:30<20:02:49,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 628/12318 [1:04:33<20:01:42,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 628/12318 [1:04:33<20:01:42,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 629/12318 [1:04:40<20:01:46,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 629/12318 [1:04:40<20:01:46,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 630/12318 [1:04:48<20:02:28,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 630/12318 [1:04:48<20:02:28,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 631/12318 [1:04:53<20:01:50,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 631/12318 [1:04:53<20:01:50,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 632/12318 [1:05:01<20:02:13,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 632/12318 [1:05:01<20:02:13,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 633/12318 [1:05:06<20:01:57,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 633/12318 [1:05:06<20:01:57,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 634/12318 [1:05:11<20:01:31,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 634/12318 [1:05:11<20:01:31,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 635/12318 [1:05:20<20:02:13,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 635/12318 [1:05:20<20:02:13,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 636/12318 [1:05:21<20:00:38,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 636/12318 [1:05:21<20:00:38,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 637/12318 [1:05:27<20:00:20,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 637/12318 [1:05:27<20:00:20,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 638/12318 [1:05:30<19:59:16,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 638/12318 [1:05:30<19:59:16,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 639/12318 [1:05:39<20:00:00,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 639/12318 [1:05:39<20:00:00,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 640/12318 [1:06:11<20:07:40,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 640/12318 [1:06:11<20:07:40,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 641/12318 [1:06:19<20:08:21,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 641/12318 [1:06:19<20:08:21,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 642/12318 [1:06:25<20:08:03,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 642/12318 [1:06:25<20:08:03,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 643/12318 [1:06:26<20:06:28,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 643/12318 [1:06:26<20:06:28,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 644/12318 [1:06:30<20:05:31,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 644/12318 [1:06:30<20:05:31,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 645/12318 [1:06:32<20:04:07,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 645/12318 [1:06:32<20:04:07,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 646/12318 [1:06:34<20:02:51,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 646/12318 [1:06:34<20:02:52,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 647/12318 [1:06:37<20:01:56,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 647/12318 [1:06:37<20:01:56,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 648/12318 [1:06:42<20:01:19,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 648/12318 [1:06:42<20:01:19,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 649/12318 [1:06:47<20:00:52,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 649/12318 [1:06:47<20:00:52,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 650/12318 [1:06:55<20:01:15,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 650/12318 [1:06:55<20:01:15,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 651/12318 [1:07:03<20:01:56,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 651/12318 [1:07:03<20:01:56,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 652/12318 [1:07:05<20:00:24,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 652/12318 [1:07:05<20:00:24,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 653/12318 [1:07:08<19:59:29,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 653/12318 [1:07:08<19:59:29,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 654/12318 [1:07:13<19:59:03,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 654/12318 [1:07:13<19:59:03,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 655/12318 [1:07:17<19:58:17,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 655/12318 [1:07:17<19:58:17,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 656/12318 [1:07:21<19:57:31,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 656/12318 [1:07:21<19:57:31,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 657/12318 [1:07:29<19:57:53,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 657/12318 [1:07:29<19:57:53,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 658/12318 [1:07:37<19:58:16,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 658/12318 [1:07:37<19:58:16,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 659/12318 [1:07:40<19:57:23,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 659/12318 [1:07:40<19:57:23,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 660/12318 [1:07:43<19:56:19,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 660/12318 [1:07:43<19:56:19,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 661/12318 [1:07:47<19:55:25,  6.15s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 661/12318 [1:07:47<19:55:25,  6.15s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 662/12318 [1:07:52<19:55:08,  6.15s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 662/12318 [1:07:52<19:55:08,  6.15s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 663/12318 [1:07:56<19:54:14,  6.15s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 663/12318 [1:07:56<19:54:14,  6.15s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 664/12318 [1:08:00<19:53:38,  6.15s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 664/12318 [1:08:00<19:53:38,  6.15s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 665/12318 [1:08:06<19:53:21,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 665/12318 [1:08:06<19:53:21,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 666/12318 [1:08:09<19:52:28,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 666/12318 [1:08:09<19:52:28,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 667/12318 [1:08:12<19:51:26,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 667/12318 [1:08:12<19:51:26,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 668/12318 [1:08:19<19:51:29,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 668/12318 [1:08:19<19:51:29,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 669/12318 [1:08:26<19:51:50,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 669/12318 [1:08:26<19:51:50,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 670/12318 [1:08:35<19:52:29,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 670/12318 [1:08:35<19:52:29,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 671/12318 [1:08:41<19:52:12,  6.14s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 671/12318 [1:08:41<19:52:12,  6.14s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 672/12318 [1:09:17<20:00:58,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 672/12318 [1:09:17<20:00:58,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 673/12318 [1:09:22<20:00:24,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 673/12318 [1:09:22<20:00:24,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 674/12318 [1:09:30<20:00:44,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 674/12318 [1:09:30<20:00:44,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 675/12318 [1:09:36<20:00:46,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 675/12318 [1:09:36<20:00:46,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 676/12318 [1:09:45<20:01:22,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 676/12318 [1:09:45<20:01:22,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   5%| | 677/12318 [1:09:54<20:01:59,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   5%| | 677/12318 [1:09:54<20:01:59,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 678/12318 [1:09:59<20:01:41,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 678/12318 [1:09:59<20:01:41,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 679/12318 [1:10:04<20:01:14,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 679/12318 [1:10:04<20:01:14,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 680/12318 [1:10:11<20:01:14,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 680/12318 [1:10:11<20:01:14,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 681/12318 [1:10:19<20:01:49,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 681/12318 [1:10:19<20:01:49,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 682/12318 [1:10:24<20:01:13,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 682/12318 [1:10:24<20:01:13,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 683/12318 [1:10:29<20:00:55,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 683/12318 [1:10:29<20:00:55,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 684/12318 [1:10:35<20:00:39,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 684/12318 [1:10:35<20:00:39,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 685/12318 [1:10:37<19:59:19,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 685/12318 [1:10:37<19:59:19,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 686/12318 [1:10:45<19:59:39,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 686/12318 [1:10:45<19:59:39,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 687/12318 [1:10:53<20:00:17,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 687/12318 [1:10:53<20:00:17,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 688/12318 [1:10:55<19:58:58,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 688/12318 [1:10:55<19:58:58,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 689/12318 [1:11:04<19:59:34,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 689/12318 [1:11:04<19:59:34,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 690/12318 [1:11:05<19:58:06,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 690/12318 [1:11:05<19:58:06,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 691/12318 [1:11:09<19:57:13,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 691/12318 [1:11:09<19:57:13,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 692/12318 [1:11:13<19:56:39,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 692/12318 [1:11:13<19:56:39,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 693/12318 [1:11:17<19:55:58,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 693/12318 [1:11:17<19:55:58,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 694/12318 [1:11:26<19:56:33,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 694/12318 [1:11:26<19:56:33,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 695/12318 [1:11:32<19:56:33,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 695/12318 [1:11:32<19:56:33,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 696/12318 [1:11:35<19:55:33,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 696/12318 [1:11:35<19:55:33,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 697/12318 [1:11:43<19:55:52,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 697/12318 [1:11:43<19:55:52,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 698/12318 [1:11:49<19:55:35,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 698/12318 [1:11:49<19:55:35,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 699/12318 [1:11:54<19:55:18,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 699/12318 [1:11:54<19:55:18,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 700/12318 [1:12:03<19:55:53,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 700/12318 [1:12:03<19:55:53,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 701/12318 [1:12:09<19:55:53,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 701/12318 [1:12:09<19:55:53,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 702/12318 [1:12:12<19:54:52,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 702/12318 [1:12:12<19:54:52,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 703/12318 [1:12:17<19:54:17,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 703/12318 [1:12:17<19:54:17,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 704/12318 [1:12:41<19:59:16,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 704/12318 [1:12:41<19:59:16,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 705/12318 [1:12:46<19:58:50,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 705/12318 [1:12:46<19:58:50,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 706/12318 [1:12:51<19:58:16,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 706/12318 [1:12:51<19:58:16,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 707/12318 [1:12:53<19:57:06,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 707/12318 [1:12:53<19:57:06,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 708/12318 [1:12:59<19:56:49,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 708/12318 [1:12:59<19:56:49,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 709/12318 [1:13:03<19:56:06,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 709/12318 [1:13:03<19:56:06,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 710/12318 [1:13:10<19:56:23,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 710/12318 [1:13:10<19:56:23,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 711/12318 [1:13:17<19:56:25,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 711/12318 [1:13:17<19:56:25,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 712/12318 [1:13:20<19:55:33,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 712/12318 [1:13:20<19:55:33,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 713/12318 [1:13:22<19:54:08,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 713/12318 [1:13:22<19:54:08,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 714/12318 [1:13:30<19:54:41,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 714/12318 [1:13:30<19:54:41,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 715/12318 [1:13:39<19:55:19,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 715/12318 [1:13:39<19:55:19,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 716/12318 [1:13:48<19:55:52,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 716/12318 [1:13:48<19:55:52,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 717/12318 [1:13:56<19:56:25,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 717/12318 [1:13:56<19:56:25,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 718/12318 [1:14:05<19:56:59,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 718/12318 [1:14:05<19:56:59,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 719/12318 [1:14:10<19:56:41,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 719/12318 [1:14:10<19:56:41,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 720/12318 [1:14:17<19:56:41,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 720/12318 [1:14:17<19:56:41,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 721/12318 [1:14:25<19:56:58,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 721/12318 [1:14:25<19:56:58,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 722/12318 [1:14:33<19:57:30,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 722/12318 [1:14:33<19:57:30,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 723/12318 [1:14:42<19:58:02,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 723/12318 [1:14:42<19:58:02,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 724/12318 [1:14:47<19:57:45,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 724/12318 [1:14:47<19:57:45,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 725/12318 [1:14:50<19:56:38,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 725/12318 [1:14:50<19:56:38,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 726/12318 [1:14:57<19:56:54,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 726/12318 [1:14:57<19:56:54,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 727/12318 [1:15:02<19:56:30,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 727/12318 [1:15:02<19:56:30,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 728/12318 [1:15:04<19:55:06,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 728/12318 [1:15:04<19:55:06,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 729/12318 [1:15:05<19:53:43,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 729/12318 [1:15:05<19:53:43,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 730/12318 [1:15:08<19:52:53,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 730/12318 [1:15:08<19:52:53,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 731/12318 [1:15:15<19:52:54,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 731/12318 [1:15:15<19:52:54,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 732/12318 [1:15:23<19:53:09,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 732/12318 [1:15:23<19:53:09,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 733/12318 [1:15:29<19:53:09,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 733/12318 [1:15:29<19:53:09,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 734/12318 [1:15:36<19:53:08,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 734/12318 [1:15:36<19:53:08,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 735/12318 [1:15:44<19:53:43,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 735/12318 [1:15:44<19:53:43,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 736/12318 [1:16:01<19:56:16,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 736/12318 [1:16:01<19:56:16,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 737/12318 [1:16:09<19:56:48,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 737/12318 [1:16:09<19:56:48,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 738/12318 [1:16:12<19:55:41,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 738/12318 [1:16:12<19:55:41,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 739/12318 [1:16:15<19:54:51,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 739/12318 [1:16:15<19:54:51,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 740/12318 [1:16:18<19:53:52,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 740/12318 [1:16:18<19:53:52,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 741/12318 [1:16:22<19:53:18,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 741/12318 [1:16:22<19:53:18,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 742/12318 [1:16:30<19:53:33,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 742/12318 [1:16:30<19:53:33,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 743/12318 [1:16:33<19:52:35,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 743/12318 [1:16:33<19:52:35,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 744/12318 [1:16:38<19:52:10,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 744/12318 [1:16:38<19:52:10,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 745/12318 [1:16:43<19:51:53,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 745/12318 [1:16:43<19:51:53,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 746/12318 [1:16:52<19:52:24,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 746/12318 [1:16:52<19:52:24,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 747/12318 [1:16:55<19:51:26,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 747/12318 [1:16:55<19:51:26,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 748/12318 [1:16:57<19:50:29,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 748/12318 [1:16:57<19:50:29,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 749/12318 [1:17:01<19:49:48,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 749/12318 [1:17:01<19:49:48,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 750/12318 [1:17:09<19:50:04,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 750/12318 [1:17:09<19:50:04,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 751/12318 [1:17:17<19:50:20,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 751/12318 [1:17:17<19:50:20,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 752/12318 [1:17:22<19:49:56,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 752/12318 [1:17:22<19:49:56,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 753/12318 [1:17:27<19:49:40,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 753/12318 [1:17:27<19:49:40,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 754/12318 [1:17:30<19:48:43,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 754/12318 [1:17:30<19:48:43,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 755/12318 [1:17:37<19:48:43,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 755/12318 [1:17:37<19:48:43,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 756/12318 [1:17:44<19:48:58,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 756/12318 [1:17:44<19:48:58,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 757/12318 [1:17:53<19:49:30,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 757/12318 [1:17:53<19:49:30,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 758/12318 [1:18:00<19:49:47,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 758/12318 [1:18:00<19:49:47,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 759/12318 [1:18:03<19:48:45,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 759/12318 [1:18:03<19:48:45,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 760/12318 [1:18:08<19:48:23,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 760/12318 [1:18:08<19:48:23,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 761/12318 [1:18:12<19:47:44,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 761/12318 [1:18:12<19:47:44,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 762/12318 [1:18:20<19:48:01,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 762/12318 [1:18:20<19:48:01,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 763/12318 [1:18:24<19:47:29,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 763/12318 [1:18:24<19:47:29,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 764/12318 [1:18:33<19:48:02,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 764/12318 [1:18:33<19:48:02,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 765/12318 [1:18:37<19:47:22,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 765/12318 [1:18:37<19:47:22,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 766/12318 [1:18:43<19:47:21,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 766/12318 [1:18:43<19:47:21,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 767/12318 [1:18:47<19:46:33,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 767/12318 [1:18:47<19:46:33,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 768/12318 [1:19:12<19:51:17,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 768/12318 [1:19:12<19:51:17,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 769/12318 [1:19:14<19:50:05,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 769/12318 [1:19:14<19:50:05,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 770/12318 [1:19:23<19:50:36,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 770/12318 [1:19:23<19:50:36,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 771/12318 [1:19:29<19:50:35,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 771/12318 [1:19:29<19:50:35,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 772/12318 [1:19:38<19:51:06,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 772/12318 [1:19:38<19:51:06,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 773/12318 [1:19:43<19:50:48,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 773/12318 [1:19:43<19:50:48,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 774/12318 [1:19:45<19:49:29,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 774/12318 [1:19:45<19:49:29,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 775/12318 [1:19:53<19:49:59,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 775/12318 [1:19:53<19:49:59,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 776/12318 [1:19:56<19:48:55,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 776/12318 [1:19:56<19:48:55,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 777/12318 [1:20:03<19:49:09,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 777/12318 [1:20:03<19:49:09,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 778/12318 [1:20:07<19:48:28,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 778/12318 [1:20:07<19:48:28,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 779/12318 [1:20:14<19:48:42,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 779/12318 [1:20:14<19:48:42,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 780/12318 [1:20:16<19:47:30,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 780/12318 [1:20:16<19:47:30,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 781/12318 [1:20:24<19:47:45,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 781/12318 [1:20:24<19:47:45,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 782/12318 [1:20:26<19:46:35,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 782/12318 [1:20:26<19:46:35,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 783/12318 [1:20:34<19:47:05,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 783/12318 [1:20:34<19:47:05,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 784/12318 [1:20:41<19:47:05,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 784/12318 [1:20:41<19:47:05,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 785/12318 [1:20:44<19:46:18,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 785/12318 [1:20:44<19:46:18,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 786/12318 [1:20:49<19:45:54,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 786/12318 [1:20:49<19:45:54,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 787/12318 [1:20:54<19:45:29,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 787/12318 [1:20:54<19:45:29,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 788/12318 [1:20:56<19:44:27,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 788/12318 [1:20:56<19:44:27,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 789/12318 [1:21:04<19:44:42,  6.17s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 789/12318 [1:21:04<19:44:42,  6.17s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 790/12318 [1:21:09<19:44:11,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 790/12318 [1:21:09<19:44:11,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 791/12318 [1:21:11<19:43:11,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 791/12318 [1:21:11<19:43:11,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 792/12318 [1:21:16<19:42:54,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 792/12318 [1:21:16<19:42:54,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 793/12318 [1:21:24<19:43:10,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 793/12318 [1:21:24<19:43:10,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 794/12318 [1:21:33<19:43:43,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 794/12318 [1:21:33<19:43:43,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 795/12318 [1:21:35<19:42:35,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 795/12318 [1:21:35<19:42:35,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 796/12318 [1:21:41<19:42:35,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 796/12318 [1:21:41<19:42:35,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 797/12318 [1:21:49<19:42:50,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 797/12318 [1:21:49<19:42:50,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 798/12318 [1:21:54<19:42:26,  6.16s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 798/12318 [1:21:54<19:42:26,  6.16s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 799/12318 [1:21:55<19:41:10,  6.15s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 799/12318 [1:21:55<19:41:10,  6.15s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   6%| | 800/12318 [1:22:49<19:52:26,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   6%| | 800/12318 [1:22:49<19:52:26,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n",
-      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
-      "  warnings.warn(\r\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 801/12318 [1:23:11<19:56:03,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 801/12318 [1:23:11<19:56:03,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 802/12318 [1:23:18<19:56:17,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 802/12318 [1:23:18<19:56:17,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 803/12318 [1:23:25<19:56:16,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 803/12318 [1:23:25<19:56:16,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 804/12318 [1:23:26<19:55:00,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 804/12318 [1:23:26<19:55:00,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 805/12318 [1:23:30<19:54:14,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 805/12318 [1:23:30<19:54:14,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 806/12318 [1:23:34<19:53:37,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 806/12318 [1:23:34<19:53:37,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 807/12318 [1:23:40<19:53:37,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 807/12318 [1:23:40<19:53:37,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 808/12318 [1:23:46<19:53:21,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 808/12318 [1:23:46<19:53:21,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 809/12318 [1:23:54<19:53:48,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 809/12318 [1:23:54<19:53:48,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 810/12318 [1:23:59<19:53:23,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 810/12318 [1:23:59<19:53:23,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 811/12318 [1:24:06<19:53:22,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 811/12318 [1:24:06<19:53:22,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 812/12318 [1:24:13<19:53:22,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 812/12318 [1:24:13<19:53:22,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 813/12318 [1:24:20<19:53:35,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 813/12318 [1:24:20<19:53:35,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 814/12318 [1:24:28<19:53:48,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 814/12318 [1:24:28<19:53:48,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 815/12318 [1:24:30<19:52:40,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 815/12318 [1:24:30<19:52:40,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 816/12318 [1:24:32<19:51:46,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 816/12318 [1:24:32<19:51:46,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 817/12318 [1:24:39<19:51:45,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 817/12318 [1:24:39<19:51:45,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 818/12318 [1:24:45<19:51:28,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 818/12318 [1:24:45<19:51:28,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 819/12318 [1:24:50<19:51:12,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 819/12318 [1:24:50<19:51:12,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 820/12318 [1:24:55<19:50:48,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 820/12318 [1:24:55<19:50:48,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 821/12318 [1:24:58<19:50:02,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 821/12318 [1:24:58<19:50:02,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 822/12318 [1:25:06<19:50:16,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 822/12318 [1:25:06<19:50:16,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 823/12318 [1:25:14<19:50:30,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 823/12318 [1:25:14<19:50:30,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 824/12318 [1:25:19<19:50:13,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 824/12318 [1:25:19<19:50:13,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 825/12318 [1:25:28<19:50:40,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 825/12318 [1:25:28<19:50:40,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 826/12318 [1:25:31<19:49:47,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 826/12318 [1:25:31<19:49:47,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 827/12318 [1:25:33<19:48:53,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 827/12318 [1:25:33<19:48:53,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 828/12318 [1:25:37<19:48:08,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 828/12318 [1:25:37<19:48:08,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 829/12318 [1:25:38<19:46:54,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 829/12318 [1:25:38<19:46:54,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 830/12318 [1:25:47<19:47:24,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 830/12318 [1:25:47<19:47:24,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 831/12318 [1:25:54<19:47:37,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 831/12318 [1:25:54<19:47:37,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 832/12318 [1:26:21<19:52:14,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 832/12318 [1:26:21<19:52:14,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 833/12318 [1:26:24<19:51:22,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 833/12318 [1:26:24<19:51:22,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 834/12318 [1:26:32<19:51:37,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 834/12318 [1:26:32<19:51:37,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 835/12318 [1:26:37<19:51:21,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 835/12318 [1:26:37<19:51:21,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 836/12318 [1:26:44<19:51:19,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 836/12318 [1:26:44<19:51:19,  6.23s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 837/12318 [1:26:45<19:50:06,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 837/12318 [1:26:45<19:50:06,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 838/12318 [1:26:48<19:49:14,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 838/12318 [1:26:48<19:49:14,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 839/12318 [1:26:50<19:48:15,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 839/12318 [1:26:50<19:48:15,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 840/12318 [1:26:56<19:48:00,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 840/12318 [1:26:56<19:48:00,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 841/12318 [1:27:03<19:47:58,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 841/12318 [1:27:03<19:47:58,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 842/12318 [1:27:06<19:47:20,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 842/12318 [1:27:06<19:47:20,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 843/12318 [1:27:14<19:47:33,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 843/12318 [1:27:14<19:47:33,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 844/12318 [1:27:23<19:47:59,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 844/12318 [1:27:23<19:47:59,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 845/12318 [1:27:29<19:47:57,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 845/12318 [1:27:29<19:47:57,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 846/12318 [1:27:31<19:46:52,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 846/12318 [1:27:31<19:46:52,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 847/12318 [1:27:37<19:46:36,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 847/12318 [1:27:37<19:46:36,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 848/12318 [1:27:44<19:46:49,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 848/12318 [1:27:44<19:46:49,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 849/12318 [1:27:51<19:46:48,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 849/12318 [1:27:51<19:46:48,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 850/12318 [1:27:52<19:45:36,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 850/12318 [1:27:52<19:45:36,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 851/12318 [1:28:00<19:45:48,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 851/12318 [1:28:00<19:45:48,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 852/12318 [1:28:03<19:45:04,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 852/12318 [1:28:03<19:45:04,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 853/12318 [1:28:07<19:44:28,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 853/12318 [1:28:07<19:44:28,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 854/12318 [1:28:13<19:44:13,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 854/12318 [1:28:13<19:44:13,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 855/12318 [1:28:20<19:44:25,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 855/12318 [1:28:20<19:44:26,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 856/12318 [1:28:28<19:44:38,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 856/12318 [1:28:28<19:44:38,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 857/12318 [1:28:35<19:44:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 857/12318 [1:28:35<19:44:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 858/12318 [1:28:43<19:45:03,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 858/12318 [1:28:43<19:45:03,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 859/12318 [1:28:49<19:44:49,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 859/12318 [1:28:49<19:44:49,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 860/12318 [1:28:51<19:43:52,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 860/12318 [1:28:51<19:43:52,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 861/12318 [1:29:00<19:44:18,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 861/12318 [1:29:00<19:44:18,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 862/12318 [1:29:07<19:44:30,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 862/12318 [1:29:07<19:44:30,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 863/12318 [1:29:09<19:43:20,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 863/12318 [1:29:09<19:43:20,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 864/12318 [1:29:29<19:46:17,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 864/12318 [1:29:29<19:46:17,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 865/12318 [1:29:34<19:46:02,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 865/12318 [1:29:34<19:46:02,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 866/12318 [1:29:42<19:46:15,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 866/12318 [1:29:42<19:46:15,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 867/12318 [1:29:47<19:45:59,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 867/12318 [1:29:47<19:45:59,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 868/12318 [1:29:49<19:44:55,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 868/12318 [1:29:49<19:44:55,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 869/12318 [1:29:54<19:44:26,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 869/12318 [1:29:54<19:44:26,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 870/12318 [1:30:00<19:44:25,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 870/12318 [1:30:00<19:44:25,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 871/12318 [1:30:02<19:43:21,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 871/12318 [1:30:02<19:43:21,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 872/12318 [1:30:10<19:43:34,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 872/12318 [1:30:10<19:43:34,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 873/12318 [1:30:15<19:43:12,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 873/12318 [1:30:15<19:43:12,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 874/12318 [1:30:22<19:43:24,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 874/12318 [1:30:22<19:43:24,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 875/12318 [1:30:31<19:43:51,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 875/12318 [1:30:31<19:43:51,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 876/12318 [1:30:32<19:42:41,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 876/12318 [1:30:32<19:42:41,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 877/12318 [1:30:39<19:42:40,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 877/12318 [1:30:39<19:42:40,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 878/12318 [1:30:44<19:42:24,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 878/12318 [1:30:44<19:42:24,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 879/12318 [1:30:49<19:42:03,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 879/12318 [1:30:49<19:42:03,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 880/12318 [1:30:53<19:41:20,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 880/12318 [1:30:53<19:41:20,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 881/12318 [1:30:56<19:40:37,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 881/12318 [1:30:56<19:40:37,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 882/12318 [1:30:57<19:39:27,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 882/12318 [1:30:57<19:39:27,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 883/12318 [1:31:05<19:39:39,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 883/12318 [1:31:05<19:39:39,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 884/12318 [1:31:06<19:38:29,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 884/12318 [1:31:06<19:38:29,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 885/12318 [1:31:13<19:38:27,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 885/12318 [1:31:13<19:38:27,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 886/12318 [1:31:21<19:38:53,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 886/12318 [1:31:21<19:38:53,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 887/12318 [1:31:28<19:38:51,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 887/12318 [1:31:28<19:38:51,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 888/12318 [1:31:37<19:39:16,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 888/12318 [1:31:37<19:39:16,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 889/12318 [1:31:39<19:38:20,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 889/12318 [1:31:39<19:38:20,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 890/12318 [1:31:42<19:37:31,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 890/12318 [1:31:42<19:37:31,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 891/12318 [1:31:46<19:37:03,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 891/12318 [1:31:46<19:37:03,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 892/12318 [1:31:53<19:37:01,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 892/12318 [1:31:53<19:37:01,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 893/12318 [1:32:00<19:37:13,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 893/12318 [1:32:00<19:37:13,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 894/12318 [1:32:03<19:36:18,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 894/12318 [1:32:03<19:36:18,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 895/12318 [1:32:09<19:36:17,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 895/12318 [1:32:09<19:36:17,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 896/12318 [1:32:45<19:42:27,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 896/12318 [1:32:45<19:42:27,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 897/12318 [1:32:53<19:42:39,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 897/12318 [1:32:53<19:42:39,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 898/12318 [1:32:56<19:41:57,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 898/12318 [1:32:56<19:41:57,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 899/12318 [1:33:03<19:41:55,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 899/12318 [1:33:03<19:41:55,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 900/12318 [1:33:09<19:41:52,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 900/12318 [1:33:09<19:41:52,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 901/12318 [1:33:13<19:41:23,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 901/12318 [1:33:13<19:41:23,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 902/12318 [1:33:16<19:40:28,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 902/12318 [1:33:16<19:40:28,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 903/12318 [1:33:22<19:40:26,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 903/12318 [1:33:22<19:40:26,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 904/12318 [1:33:30<19:40:37,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 904/12318 [1:33:30<19:40:37,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 905/12318 [1:33:35<19:40:16,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 905/12318 [1:33:35<19:40:16,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 906/12318 [1:33:43<19:40:28,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 906/12318 [1:33:43<19:40:28,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 907/12318 [1:33:50<19:40:40,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 907/12318 [1:33:50<19:40:40,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 908/12318 [1:33:54<19:40:05,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 908/12318 [1:33:54<19:40:05,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 909/12318 [1:33:58<19:39:30,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 909/12318 [1:33:58<19:39:30,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 910/12318 [1:34:03<19:39:14,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 910/12318 [1:34:03<19:39:14,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 911/12318 [1:34:09<19:38:59,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 911/12318 [1:34:09<19:38:59,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 912/12318 [1:34:12<19:38:17,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 912/12318 [1:34:12<19:38:17,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 913/12318 [1:34:16<19:37:36,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 913/12318 [1:34:16<19:37:36,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 914/12318 [1:34:22<19:37:35,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 914/12318 [1:34:22<19:37:35,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 915/12318 [1:34:28<19:37:20,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 915/12318 [1:34:28<19:37:20,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 916/12318 [1:34:36<19:37:44,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 916/12318 [1:34:36<19:37:44,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 917/12318 [1:34:44<19:37:56,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 917/12318 [1:34:44<19:37:56,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 918/12318 [1:34:46<19:37:02,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 918/12318 [1:34:46<19:37:02,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 919/12318 [1:34:49<19:36:08,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 919/12318 [1:34:49<19:36:08,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 920/12318 [1:34:52<19:35:21,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 920/12318 [1:34:52<19:35:21,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 921/12318 [1:34:56<19:34:53,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 921/12318 [1:34:56<19:34:53,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 922/12318 [1:35:03<19:34:52,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 922/12318 [1:35:03<19:34:52,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   7%| | 923/12318 [1:35:10<19:35:03,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   7%| | 923/12318 [1:35:10<19:35:03,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 924/12318 [1:35:13<19:34:10,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 924/12318 [1:35:13<19:34:10,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 925/12318 [1:35:19<19:34:09,  6.18s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 925/12318 [1:35:19<19:34:09,  6.18s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 926/12318 [1:35:28<19:34:33,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 926/12318 [1:35:28<19:34:33,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 927/12318 [1:35:34<19:34:30,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 927/12318 [1:35:34<19:34:30,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 928/12318 [1:36:08<19:40:05,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 928/12318 [1:36:08<19:40:05,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 929/12318 [1:36:14<19:39:51,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 929/12318 [1:36:14<19:39:51,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 930/12318 [1:36:22<19:40:13,  6.22s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 930/12318 [1:36:22<19:40:13,  6.22s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 931/12318 [1:36:24<19:39:06,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 931/12318 [1:36:24<19:39:07,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 932/12318 [1:36:29<19:38:45,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 932/12318 [1:36:29<19:38:45,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 933/12318 [1:36:37<19:39:08,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 933/12318 [1:36:37<19:39:08,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 934/12318 [1:36:43<19:38:54,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 934/12318 [1:36:43<19:38:54,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 935/12318 [1:36:45<19:37:55,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 935/12318 [1:36:45<19:37:55,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 936/12318 [1:36:50<19:37:41,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 936/12318 [1:36:50<19:37:41,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 937/12318 [1:36:59<19:38:03,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 937/12318 [1:36:59<19:38:03,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 938/12318 [1:37:01<19:37:04,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 938/12318 [1:37:01<19:37:04,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 939/12318 [1:37:09<19:37:27,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 939/12318 [1:37:09<19:37:27,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 940/12318 [1:37:17<19:37:38,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 940/12318 [1:37:17<19:37:38,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 941/12318 [1:37:18<19:36:32,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 941/12318 [1:37:18<19:36:32,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 942/12318 [1:37:24<19:36:18,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 942/12318 [1:37:24<19:36:18,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 943/12318 [1:37:29<19:35:57,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 943/12318 [1:37:29<19:35:57,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 944/12318 [1:37:35<19:35:55,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 944/12318 [1:37:35<19:35:55,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 945/12318 [1:37:41<19:35:40,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 945/12318 [1:37:41<19:35:40,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 946/12318 [1:37:48<19:35:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 946/12318 [1:37:48<19:35:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 947/12318 [1:37:57<19:36:14,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 947/12318 [1:37:57<19:36:14,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 948/12318 [1:38:00<19:35:34,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 948/12318 [1:38:00<19:35:34,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 949/12318 [1:38:04<19:35:01,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 949/12318 [1:38:04<19:35:01,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 950/12318 [1:38:09<19:34:35,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 950/12318 [1:38:09<19:34:35,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 951/12318 [1:38:16<19:34:35,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 951/12318 [1:38:16<19:34:35,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 952/12318 [1:38:21<19:34:15,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 952/12318 [1:38:21<19:34:15,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 953/12318 [1:38:25<19:33:50,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 953/12318 [1:38:25<19:33:50,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 954/12318 [1:38:28<19:33:05,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 954/12318 [1:38:28<19:33:05,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 955/12318 [1:38:33<19:32:38,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 955/12318 [1:38:33<19:32:38,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 956/12318 [1:38:40<19:32:50,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 956/12318 [1:38:40<19:32:50,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 957/12318 [1:38:46<19:32:35,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 957/12318 [1:38:46<19:32:35,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 958/12318 [1:38:49<19:31:50,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 958/12318 [1:38:49<19:31:50,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 959/12318 [1:38:54<19:31:30,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 959/12318 [1:38:54<19:31:30,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 960/12318 [1:39:20<19:35:23,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 960/12318 [1:39:20<19:35:23,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 961/12318 [1:39:28<19:35:35,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 961/12318 [1:39:28<19:35:35,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 962/12318 [1:39:34<19:35:21,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 962/12318 [1:39:34<19:35:21,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 963/12318 [1:39:36<19:34:35,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 963/12318 [1:39:36<19:34:35,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 964/12318 [1:39:43<19:34:33,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 964/12318 [1:39:43<19:34:33,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 965/12318 [1:39:45<19:33:41,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 965/12318 [1:39:45<19:33:41,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 966/12318 [1:39:53<19:33:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 966/12318 [1:39:53<19:33:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 967/12318 [1:39:56<19:33:11,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 967/12318 [1:39:56<19:33:11,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 968/12318 [1:40:05<19:33:32,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 968/12318 [1:40:05<19:33:32,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 969/12318 [1:40:11<19:33:30,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 969/12318 [1:40:11<19:33:30,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 970/12318 [1:40:20<19:33:53,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 970/12318 [1:40:20<19:33:53,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 971/12318 [1:40:22<19:33:01,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 971/12318 [1:40:22<19:33:01,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 972/12318 [1:40:29<19:32:59,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 972/12318 [1:40:29<19:32:59,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 973/12318 [1:40:35<19:32:58,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 973/12318 [1:40:35<19:32:58,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 974/12318 [1:40:42<19:32:56,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 974/12318 [1:40:42<19:32:56,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 975/12318 [1:40:50<19:33:08,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 975/12318 [1:40:50<19:33:08,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 976/12318 [1:40:53<19:32:29,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 976/12318 [1:40:53<19:32:29,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 977/12318 [1:40:57<19:31:57,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 977/12318 [1:40:57<19:31:57,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 978/12318 [1:41:06<19:32:19,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 978/12318 [1:41:06<19:32:19,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 979/12318 [1:41:15<19:32:43,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 979/12318 [1:41:15<19:32:43,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 980/12318 [1:41:22<19:32:54,  6.21s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 980/12318 [1:41:22<19:32:54,  6.21s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 981/12318 [1:41:24<19:31:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 981/12318 [1:41:24<19:31:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 982/12318 [1:41:26<19:30:55,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 982/12318 [1:41:26<19:30:55,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 983/12318 [1:41:34<19:31:19,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 983/12318 [1:41:34<19:31:19,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 984/12318 [1:41:38<19:30:47,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 984/12318 [1:41:38<19:30:47,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 985/12318 [1:41:43<19:30:28,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 985/12318 [1:41:43<19:30:28,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 986/12318 [1:41:46<19:29:43,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 986/12318 [1:41:46<19:29:43,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 987/12318 [1:41:51<19:29:18,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 987/12318 [1:41:51<19:29:18,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 988/12318 [1:41:59<19:29:41,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 988/12318 [1:41:59<19:29:41,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 989/12318 [1:42:04<19:29:15,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 989/12318 [1:42:04<19:29:15,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 990/12318 [1:42:07<19:28:37,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 990/12318 [1:42:07<19:28:37,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 991/12318 [1:42:11<19:28:05,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 991/12318 [1:42:11<19:28:05,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 992/12318 [1:42:33<19:30:51,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 992/12318 [1:42:33<19:30:51,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 993/12318 [1:42:37<19:30:26,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 993/12318 [1:42:37<19:30:26,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 994/12318 [1:42:43<19:30:13,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 994/12318 [1:42:43<19:30:13,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 995/12318 [1:42:44<19:29:11,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 995/12318 [1:42:44<19:29:11,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 996/12318 [1:42:53<19:29:34,  6.20s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 996/12318 [1:42:53<19:29:34,  6.20s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 997/12318 [1:42:56<19:28:50,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 997/12318 [1:42:56<19:28:50,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 998/12318 [1:42:57<19:27:49,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 998/12318 [1:42:57<19:27:49,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 999/12318 [1:43:02<19:27:35,  6.19s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:   8%| | 999/12318 [1:43:02<19:27:35,  6.19s/it, v_num=e4xv, train/loss="
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1000/12318 [1:43:04<19:26:33,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1000/12318 [1:43:04<19:26:33,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1001/12318 [1:43:09<19:26:14,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1001/12318 [1:43:09<19:26:14,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1002/12318 [1:43:14<19:25:54,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1002/12318 [1:43:14<19:25:54,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1003/12318 [1:43:15<19:24:54,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1003/12318 [1:43:15<19:24:54,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1004/12318 [1:43:21<19:24:40,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1004/12318 [1:43:21<19:24:40,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1005/12318 [1:43:26<19:24:27,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1005/12318 [1:43:26<19:24:27,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1006/12318 [1:43:34<19:24:37,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1006/12318 [1:43:34<19:24:37,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1007/12318 [1:43:35<19:23:37,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1007/12318 [1:43:35<19:23:37,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1008/12318 [1:43:44<19:23:59,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1008/12318 [1:43:44<19:23:59,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1009/12318 [1:43:53<19:24:20,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1009/12318 [1:43:53<19:24:20,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1010/12318 [1:43:59<19:24:18,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1010/12318 [1:43:59<19:24:18,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1011/12318 [1:44:02<19:23:35,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1011/12318 [1:44:02<19:23:35,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1012/12318 [1:44:04<19:22:47,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1012/12318 [1:44:04<19:22:47,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1013/12318 [1:44:09<19:22:28,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1013/12318 [1:44:09<19:22:28,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1014/12318 [1:44:14<19:22:04,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1014/12318 [1:44:14<19:22:04,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1015/12318 [1:44:16<19:21:09,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1015/12318 [1:44:16<19:21:09,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1016/12318 [1:44:23<19:21:20,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1016/12318 [1:44:23<19:21:20,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1017/12318 [1:44:32<19:21:44,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1017/12318 [1:44:32<19:21:44,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1018/12318 [1:44:38<19:21:32,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1018/12318 [1:44:38<19:21:32,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1019/12318 [1:44:44<19:21:19,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1019/12318 [1:44:44<19:21:19,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1020/12318 [1:44:52<19:21:43,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1020/12318 [1:44:52<19:21:43,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1021/12318 [1:44:59<19:21:41,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1021/12318 [1:44:59<19:21:41,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1022/12318 [1:45:04<19:21:27,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1022/12318 [1:45:04<19:21:27,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1023/12318 [1:45:09<19:21:08,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1023/12318 [1:45:09<19:21:08,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1024/12318 [1:45:54<19:28:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1024/12318 [1:45:54<19:28:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1025/12318 [1:45:59<19:27:50,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1025/12318 [1:45:59<19:27:50,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1026/12318 [1:46:05<19:27:39,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1026/12318 [1:46:05<19:27:39,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1027/12318 [1:46:09<19:27:08,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1027/12318 [1:46:09<19:27:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1028/12318 [1:46:18<19:27:29,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1028/12318 [1:46:18<19:27:29,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1029/12318 [1:46:23<19:27:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1029/12318 [1:46:23<19:27:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1030/12318 [1:46:32<19:27:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1030/12318 [1:46:32<19:27:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1031/12318 [1:46:37<19:27:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1031/12318 [1:46:37<19:27:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1032/12318 [1:46:39<19:26:19,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1032/12318 [1:46:39<19:26:19,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1033/12318 [1:46:46<19:26:30,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1033/12318 [1:46:46<19:26:30,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1034/12318 [1:46:48<19:25:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1034/12318 [1:46:48<19:25:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1035/12318 [1:46:55<19:25:34,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1035/12318 [1:46:55<19:25:34,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1036/12318 [1:46:56<19:24:35,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1036/12318 [1:46:56<19:24:35,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1037/12318 [1:47:04<19:24:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1037/12318 [1:47:04<19:24:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1038/12318 [1:47:10<19:24:42,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1038/12318 [1:47:10<19:24:42,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1039/12318 [1:47:12<19:23:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1039/12318 [1:47:12<19:23:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1040/12318 [1:47:15<19:23:07,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1040/12318 [1:47:15<19:23:07,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1041/12318 [1:47:24<19:23:29,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1041/12318 [1:47:24<19:23:29,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1042/12318 [1:47:32<19:23:50,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1042/12318 [1:47:32<19:23:50,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1043/12318 [1:47:39<19:23:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1043/12318 [1:47:39<19:23:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1044/12318 [1:47:46<19:23:48,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1044/12318 [1:47:46<19:23:48,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1045/12318 [1:47:49<19:23:06,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1045/12318 [1:47:49<19:23:06,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1046/12318 [1:47:56<19:23:16,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1046/12318 [1:47:56<19:23:16,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   8%| | 1047/12318 [1:47:59<19:22:34,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   8%| | 1047/12318 [1:47:59<19:22:34,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1048/12318 [1:48:02<19:21:47,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1048/12318 [1:48:02<19:21:47,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1049/12318 [1:48:09<19:21:57,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1049/12318 [1:48:09<19:21:57,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1050/12318 [1:48:18<19:22:17,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1050/12318 [1:48:18<19:22:17,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1051/12318 [1:48:26<19:22:27,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1051/12318 [1:48:26<19:22:27,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1052/12318 [1:48:29<19:21:52,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1052/12318 [1:48:29<19:21:52,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1053/12318 [1:48:33<19:21:23,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1053/12318 [1:48:33<19:21:23,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1054/12318 [1:48:41<19:21:34,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1054/12318 [1:48:41<19:21:34,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1055/12318 [1:48:44<19:20:59,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1055/12318 [1:48:44<19:20:59,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1056/12318 [1:49:03<19:23:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1056/12318 [1:49:03<19:23:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1057/12318 [1:49:07<19:22:38,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1057/12318 [1:49:07<19:22:38,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1058/12318 [1:49:10<19:21:51,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1058/12318 [1:49:10<19:21:51,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1059/12318 [1:49:17<19:22:01,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1059/12318 [1:49:17<19:22:01,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1060/12318 [1:49:26<19:22:23,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1060/12318 [1:49:26<19:22:23,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1061/12318 [1:49:35<19:22:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1061/12318 [1:49:35<19:22:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1062/12318 [1:49:40<19:22:24,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1062/12318 [1:49:40<19:22:24,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1063/12318 [1:49:42<19:21:32,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1063/12318 [1:49:42<19:21:32,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1064/12318 [1:49:47<19:21:19,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1064/12318 [1:49:47<19:21:19,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1065/12318 [1:49:51<19:20:43,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1065/12318 [1:49:51<19:20:43,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1066/12318 [1:49:58<19:20:51,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1066/12318 [1:49:58<19:20:51,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1067/12318 [1:50:05<19:20:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1067/12318 [1:50:05<19:20:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1068/12318 [1:50:10<19:20:31,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1068/12318 [1:50:10<19:20:31,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1069/12318 [1:50:13<19:19:55,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1069/12318 [1:50:13<19:19:55,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1070/12318 [1:50:18<19:19:32,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1070/12318 [1:50:18<19:19:32,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1071/12318 [1:50:21<19:18:57,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1071/12318 [1:50:21<19:18:57,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1072/12318 [1:50:26<19:18:33,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1072/12318 [1:50:26<19:18:33,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1073/12318 [1:50:33<19:18:42,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1073/12318 [1:50:33<19:18:42,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1074/12318 [1:50:38<19:18:19,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1074/12318 [1:50:38<19:18:19,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1075/12318 [1:50:47<19:18:39,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1075/12318 [1:50:47<19:18:39,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1076/12318 [1:50:54<19:18:50,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1076/12318 [1:50:54<19:18:50,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1077/12318 [1:50:59<19:18:27,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1077/12318 [1:50:59<19:18:27,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1078/12318 [1:51:06<19:18:25,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1078/12318 [1:51:06<19:18:25,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1079/12318 [1:51:12<19:18:23,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1079/12318 [1:51:12<19:18:23,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1080/12318 [1:51:17<19:18:05,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1080/12318 [1:51:17<19:18:05,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1081/12318 [1:51:22<19:17:47,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1081/12318 [1:51:22<19:17:47,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1082/12318 [1:51:25<19:17:07,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1082/12318 [1:51:25<19:17:07,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1083/12318 [1:51:32<19:17:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1083/12318 [1:51:32<19:17:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1084/12318 [1:51:38<19:17:03,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1084/12318 [1:51:38<19:17:03,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1085/12318 [1:51:40<19:16:12,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1085/12318 [1:51:40<19:16:12,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1086/12318 [1:51:44<19:15:42,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1086/12318 [1:51:44<19:15:42,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1087/12318 [1:51:51<19:15:40,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1087/12318 [1:51:51<19:15:40,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1088/12318 [1:52:19<19:19:23,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1088/12318 [1:52:19<19:19:23,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1089/12318 [1:52:26<19:19:20,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1089/12318 [1:52:26<19:19:20,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1090/12318 [1:52:33<19:19:28,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1090/12318 [1:52:33<19:19:28,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1091/12318 [1:52:42<19:19:49,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1091/12318 [1:52:42<19:19:49,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1092/12318 [1:52:51<19:20:09,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1092/12318 [1:52:51<19:20:09,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1093/12318 [1:52:56<19:19:56,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1093/12318 [1:52:56<19:19:56,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1094/12318 [1:52:59<19:19:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1094/12318 [1:52:59<19:19:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1095/12318 [1:53:08<19:19:34,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1095/12318 [1:53:08<19:19:34,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1096/12318 [1:53:15<19:19:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1096/12318 [1:53:15<19:19:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1097/12318 [1:53:21<19:19:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1097/12318 [1:53:21<19:19:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1098/12318 [1:53:22<19:18:35,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1098/12318 [1:53:22<19:18:35,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1099/12318 [1:53:24<19:17:39,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1099/12318 [1:53:24<19:17:39,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1100/12318 [1:53:25<19:16:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1100/12318 [1:53:25<19:16:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1101/12318 [1:53:30<19:16:27,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1101/12318 [1:53:30<19:16:27,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1102/12318 [1:53:32<19:15:31,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1102/12318 [1:53:32<19:15:31,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1103/12318 [1:53:40<19:15:52,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1103/12318 [1:53:40<19:15:52,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1104/12318 [1:53:49<19:16:12,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1104/12318 [1:53:49<19:16:12,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1105/12318 [1:53:53<19:15:38,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1105/12318 [1:53:53<19:15:38,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1106/12318 [1:53:59<19:15:37,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1106/12318 [1:53:59<19:15:37,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1107/12318 [1:54:04<19:15:14,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1107/12318 [1:54:04<19:15:14,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1108/12318 [1:54:10<19:15:13,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1108/12318 [1:54:10<19:15:13,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1109/12318 [1:54:14<19:14:39,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1109/12318 [1:54:14<19:14:39,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1110/12318 [1:54:19<19:14:22,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1110/12318 [1:54:19<19:14:22,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1111/12318 [1:54:22<19:13:48,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1111/12318 [1:54:22<19:13:48,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1112/12318 [1:54:28<19:13:36,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1112/12318 [1:54:28<19:13:36,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1113/12318 [1:54:34<19:13:23,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1113/12318 [1:54:34<19:13:23,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1114/12318 [1:54:35<19:12:28,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1114/12318 [1:54:35<19:12:28,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1115/12318 [1:54:42<19:12:37,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1115/12318 [1:54:42<19:12:37,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1116/12318 [1:54:51<19:12:57,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1116/12318 [1:54:51<19:12:57,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1117/12318 [1:54:53<19:12:02,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1117/12318 [1:54:53<19:12:02,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1118/12318 [1:54:54<19:11:12,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1118/12318 [1:54:54<19:11:12,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1119/12318 [1:55:03<19:11:33,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1119/12318 [1:55:03<19:11:33,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1120/12318 [1:55:26<19:14:09,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1120/12318 [1:55:26<19:14:09,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1121/12318 [1:55:32<19:14:07,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1121/12318 [1:55:32<19:14:07,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1122/12318 [1:55:39<19:14:05,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1122/12318 [1:55:39<19:14:05,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1123/12318 [1:55:44<19:13:47,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1123/12318 [1:55:44<19:13:47,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1124/12318 [1:55:51<19:13:46,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1124/12318 [1:55:51<19:13:46,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1125/12318 [1:55:57<19:13:45,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1125/12318 [1:55:57<19:13:45,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1126/12318 [1:56:00<19:13:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1126/12318 [1:56:00<19:13:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1127/12318 [1:56:09<19:13:26,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1127/12318 [1:56:09<19:13:26,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1128/12318 [1:56:14<19:13:08,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1128/12318 [1:56:14<19:13:08,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1129/12318 [1:56:23<19:13:27,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1129/12318 [1:56:23<19:13:27,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1130/12318 [1:56:26<19:12:49,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1130/12318 [1:56:26<19:12:49,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1131/12318 [1:56:32<19:12:46,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1131/12318 [1:56:32<19:12:46,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1132/12318 [1:56:36<19:12:13,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1132/12318 [1:56:36<19:12:13,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1133/12318 [1:56:41<19:12:01,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1133/12318 [1:56:41<19:12:01,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1134/12318 [1:56:47<19:11:49,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1134/12318 [1:56:47<19:11:49,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1135/12318 [1:56:56<19:12:08,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1135/12318 [1:56:56<19:12:08,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1136/12318 [1:56:59<19:11:30,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1136/12318 [1:56:59<19:11:30,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1137/12318 [1:57:04<19:11:13,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1137/12318 [1:57:04<19:11:13,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1138/12318 [1:57:12<19:11:32,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1138/12318 [1:57:12<19:11:32,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1139/12318 [1:57:21<19:11:51,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1139/12318 [1:57:21<19:11:51,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1140/12318 [1:57:26<19:11:33,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1140/12318 [1:57:26<19:11:33,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1141/12318 [1:57:27<19:10:40,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1141/12318 [1:57:27<19:10:40,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1142/12318 [1:57:29<19:09:52,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1142/12318 [1:57:29<19:09:52,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1143/12318 [1:57:31<19:09:03,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1143/12318 [1:57:31<19:09:03,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1144/12318 [1:57:35<19:08:36,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1144/12318 [1:57:35<19:08:36,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1145/12318 [1:57:42<19:08:34,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1145/12318 [1:57:42<19:08:34,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1146/12318 [1:57:51<19:08:53,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1146/12318 [1:57:51<19:08:53,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1147/12318 [1:57:53<19:08:14,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1147/12318 [1:57:53<19:08:14,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1148/12318 [1:57:56<19:07:37,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1148/12318 [1:57:56<19:07:37,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1149/12318 [1:58:00<19:07:09,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1149/12318 [1:58:00<19:07:09,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1150/12318 [1:58:07<19:07:07,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1150/12318 [1:58:07<19:07:07,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1151/12318 [1:58:14<19:07:14,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1151/12318 [1:58:14<19:07:14,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1152/12318 [1:58:58<19:13:08,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1152/12318 [1:58:58<19:13:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1153/12318 [1:59:04<19:13:06,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1153/12318 [1:59:04<19:13:06,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1154/12318 [1:59:13<19:13:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1154/12318 [1:59:13<19:13:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1155/12318 [1:59:19<19:13:13,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1155/12318 [1:59:19<19:13:13,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1156/12318 [1:59:26<19:13:20,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1156/12318 [1:59:26<19:13:20,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1157/12318 [1:59:32<19:13:09,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1157/12318 [1:59:32<19:13:09,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1158/12318 [1:59:39<19:13:07,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1158/12318 [1:59:39<19:13:07,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1159/12318 [1:59:47<19:13:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1159/12318 [1:59:47<19:13:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1160/12318 [1:59:52<19:13:08,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1160/12318 [1:59:52<19:13:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1161/12318 [1:59:59<19:13:07,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1161/12318 [1:59:59<19:13:07,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1162/12318 [2:00:06<19:13:05,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1162/12318 [2:00:06<19:13:05,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1163/12318 [2:00:09<19:12:26,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1163/12318 [2:00:09<19:12:26,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1164/12318 [2:00:16<19:12:34,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1164/12318 [2:00:16<19:12:34,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1165/12318 [2:00:23<19:12:32,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1165/12318 [2:00:23<19:12:32,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1166/12318 [2:00:25<19:11:45,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1166/12318 [2:00:25<19:11:45,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1167/12318 [2:00:32<19:11:52,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1167/12318 [2:00:32<19:11:52,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1168/12318 [2:00:36<19:11:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1168/12318 [2:00:36<19:11:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1169/12318 [2:00:41<19:11:02,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1169/12318 [2:00:41<19:11:02,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:   9%| | 1170/12318 [2:00:49<19:11:10,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:   9%| | 1170/12318 [2:00:49<19:11:10,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1171/12318 [2:00:53<19:10:48,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1171/12318 [2:00:53<19:10:48,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1172/12318 [2:00:57<19:10:20,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1172/12318 [2:00:57<19:10:20,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1173/12318 [2:01:05<19:10:28,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1173/12318 [2:01:05<19:10:28,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1174/12318 [2:01:11<19:10:26,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1174/12318 [2:01:11<19:10:26,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1175/12318 [2:01:20<19:10:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1175/12318 [2:01:20<19:10:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1176/12318 [2:01:29<19:11:01,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1176/12318 [2:01:29<19:11:01,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1177/12318 [2:01:34<19:10:48,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1177/12318 [2:01:34<19:10:48,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1178/12318 [2:01:41<19:10:46,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1178/12318 [2:01:41<19:10:46,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1179/12318 [2:01:48<19:10:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1179/12318 [2:01:48<19:10:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1180/12318 [2:01:57<19:11:06,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1180/12318 [2:01:57<19:11:06,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1181/12318 [2:02:00<19:10:33,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1181/12318 [2:02:00<19:10:33,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1182/12318 [2:02:04<19:10:08,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1182/12318 [2:02:04<19:10:08,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1183/12318 [2:02:09<19:09:51,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1183/12318 [2:02:09<19:09:51,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1184/12318 [2:02:28<19:11:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1184/12318 [2:02:28<19:11:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1185/12318 [2:02:35<19:11:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1185/12318 [2:02:35<19:11:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1186/12318 [2:02:38<19:11:04,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1186/12318 [2:02:38<19:11:04,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1187/12318 [2:02:42<19:10:37,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1187/12318 [2:02:42<19:10:37,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1188/12318 [2:02:43<19:09:45,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1188/12318 [2:02:43<19:09:45,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1189/12318 [2:02:50<19:09:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1189/12318 [2:02:50<19:09:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1190/12318 [2:02:55<19:09:27,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1190/12318 [2:02:55<19:09:27,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1191/12318 [2:03:00<19:09:15,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1191/12318 [2:03:00<19:09:15,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1192/12318 [2:03:06<19:09:02,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1192/12318 [2:03:06<19:09:02,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1193/12318 [2:03:11<19:08:45,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1193/12318 [2:03:11<19:08:45,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1194/12318 [2:03:16<19:08:34,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1194/12318 [2:03:16<19:08:34,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1195/12318 [2:03:21<19:08:12,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1195/12318 [2:03:21<19:08:12,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1196/12318 [2:03:28<19:08:10,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1196/12318 [2:03:28<19:08:10,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1197/12318 [2:03:35<19:08:17,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1197/12318 [2:03:35<19:08:17,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1198/12318 [2:03:39<19:07:50,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1198/12318 [2:03:39<19:07:50,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1199/12318 [2:03:41<19:06:59,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1199/12318 [2:03:41<19:06:59,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1200/12318 [2:03:46<19:06:46,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1200/12318 [2:03:46<19:06:46,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1201/12318 [2:03:55<19:07:03,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1201/12318 [2:03:55<19:07:03,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1202/12318 [2:03:58<19:06:26,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1202/12318 [2:03:58<19:06:26,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1203/12318 [2:04:03<19:06:10,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1203/12318 [2:04:03<19:06:10,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1204/12318 [2:04:08<19:05:57,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1204/12318 [2:04:08<19:05:57,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1205/12318 [2:04:17<19:06:14,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1205/12318 [2:04:17<19:06:14,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1206/12318 [2:04:21<19:05:53,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1206/12318 [2:04:21<19:05:53,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1207/12318 [2:04:29<19:06:00,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1207/12318 [2:04:29<19:06:00,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1208/12318 [2:04:38<19:06:18,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1208/12318 [2:04:38<19:06:18,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1209/12318 [2:04:44<19:06:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1209/12318 [2:04:44<19:06:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1210/12318 [2:04:47<19:05:34,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1210/12318 [2:04:47<19:05:34,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1211/12318 [2:04:56<19:05:51,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1211/12318 [2:04:56<19:05:51,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1212/12318 [2:05:04<19:06:08,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1212/12318 [2:05:04<19:06:08,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1213/12318 [2:05:09<19:05:51,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1213/12318 [2:05:09<19:05:51,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1214/12318 [2:05:16<19:05:48,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1214/12318 [2:05:16<19:05:48,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1215/12318 [2:05:22<19:05:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1215/12318 [2:05:22<19:05:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1216/12318 [2:05:40<19:07:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1216/12318 [2:05:40<19:07:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1217/12318 [2:05:48<19:07:35,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1217/12318 [2:05:48<19:07:35,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1218/12318 [2:05:57<19:07:52,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1218/12318 [2:05:57<19:07:52,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1219/12318 [2:06:00<19:07:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1219/12318 [2:06:00<19:07:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1220/12318 [2:06:07<19:07:22,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1220/12318 [2:06:07<19:07:22,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1221/12318 [2:06:14<19:07:19,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1221/12318 [2:06:14<19:07:19,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1222/12318 [2:06:18<19:06:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1222/12318 [2:06:18<19:06:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1223/12318 [2:06:22<19:06:30,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1223/12318 [2:06:22<19:06:30,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1224/12318 [2:06:27<19:06:12,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1224/12318 [2:06:27<19:06:12,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1225/12318 [2:06:33<19:06:00,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1225/12318 [2:06:33<19:06:00,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1226/12318 [2:06:38<19:05:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1226/12318 [2:06:38<19:05:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1227/12318 [2:06:45<19:05:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1227/12318 [2:06:45<19:05:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1228/12318 [2:06:50<19:05:32,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1228/12318 [2:06:50<19:05:32,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1229/12318 [2:06:59<19:05:49,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1229/12318 [2:06:59<19:05:49,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1230/12318 [2:07:04<19:05:33,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1230/12318 [2:07:04<19:05:33,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1231/12318 [2:07:13<19:05:49,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1231/12318 [2:07:13<19:05:49,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1232/12318 [2:07:20<19:05:48,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1232/12318 [2:07:20<19:05:48,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1233/12318 [2:07:25<19:05:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1233/12318 [2:07:25<19:05:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1234/12318 [2:07:33<19:05:48,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1234/12318 [2:07:33<19:05:48,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1235/12318 [2:07:39<19:05:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1235/12318 [2:07:39<19:05:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1236/12318 [2:07:40<19:04:46,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1236/12318 [2:07:40<19:04:46,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1237/12318 [2:07:47<19:04:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1237/12318 [2:07:47<19:04:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1238/12318 [2:07:55<19:04:52,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1238/12318 [2:07:55<19:04:52,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1239/12318 [2:07:57<19:04:07,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1239/12318 [2:07:57<19:04:07,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1240/12318 [2:08:03<19:04:05,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1240/12318 [2:08:03<19:04:05,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1241/12318 [2:08:05<19:03:20,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1241/12318 [2:08:05<19:03:20,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1242/12318 [2:08:10<19:03:00,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1242/12318 [2:08:10<19:03:00,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1243/12318 [2:08:17<19:03:08,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1243/12318 [2:08:17<19:03:08,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1244/12318 [2:08:25<19:03:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1244/12318 [2:08:25<19:03:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1245/12318 [2:08:26<19:02:25,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1245/12318 [2:08:26<19:02:25,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1246/12318 [2:08:32<19:02:13,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1246/12318 [2:08:32<19:02:13,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1247/12318 [2:08:35<19:01:38,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1247/12318 [2:08:35<19:01:38,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1248/12318 [2:08:49<19:02:38,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1248/12318 [2:08:49<19:02:38,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1249/12318 [2:08:54<19:02:26,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1249/12318 [2:08:54<19:02:26,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1250/12318 [2:09:03<19:02:42,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1250/12318 [2:09:03<19:02:42,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1251/12318 [2:09:04<19:01:53,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1251/12318 [2:09:04<19:01:53,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1252/12318 [2:09:09<19:01:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1252/12318 [2:09:09<19:01:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1253/12318 [2:09:12<19:00:57,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1253/12318 [2:09:12<19:00:57,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1254/12318 [2:09:14<19:00:21,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1254/12318 [2:09:14<19:00:21,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1255/12318 [2:09:23<19:00:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1255/12318 [2:09:23<19:00:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1256/12318 [2:09:29<19:00:24,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1256/12318 [2:09:29<19:00:24,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1257/12318 [2:09:36<19:00:30,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1257/12318 [2:09:36<19:00:30,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1258/12318 [2:09:42<19:00:17,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1258/12318 [2:09:42<19:00:17,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1259/12318 [2:09:48<19:00:14,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1259/12318 [2:09:48<19:00:14,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1260/12318 [2:09:54<19:00:02,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1260/12318 [2:09:54<19:00:02,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1261/12318 [2:10:01<19:00:08,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1261/12318 [2:10:01<19:00:08,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1262/12318 [2:10:05<18:59:38,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1262/12318 [2:10:05<18:59:38,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1263/12318 [2:10:09<18:59:13,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1263/12318 [2:10:09<18:59:13,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1264/12318 [2:10:16<18:59:20,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1264/12318 [2:10:16<18:59:20,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1265/12318 [2:10:24<18:59:28,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1265/12318 [2:10:24<18:59:28,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1266/12318 [2:10:27<18:58:54,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1266/12318 [2:10:27<18:58:54,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1267/12318 [2:10:34<18:58:51,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1267/12318 [2:10:34<18:58:51,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1268/12318 [2:10:37<18:58:21,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1268/12318 [2:10:37<18:58:21,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1269/12318 [2:10:45<18:58:28,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1269/12318 [2:10:45<18:58:28,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1270/12318 [2:10:50<18:58:17,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1270/12318 [2:10:50<18:58:17,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1271/12318 [2:10:56<18:58:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1271/12318 [2:10:56<18:58:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1272/12318 [2:11:01<18:57:45,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1272/12318 [2:11:01<18:57:45,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1273/12318 [2:11:07<18:57:42,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1273/12318 [2:11:07<18:57:42,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1274/12318 [2:11:15<18:57:49,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1274/12318 [2:11:15<18:57:49,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1275/12318 [2:11:20<18:57:37,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1275/12318 [2:11:20<18:57:37,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1276/12318 [2:11:26<18:57:26,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1276/12318 [2:11:26<18:57:26,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1277/12318 [2:11:33<18:57:23,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1277/12318 [2:11:33<18:57:23,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1278/12318 [2:11:38<18:57:12,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1278/12318 [2:11:38<18:57:12,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1279/12318 [2:11:41<18:56:38,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1279/12318 [2:11:41<18:56:38,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1280/12318 [2:12:06<18:59:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1280/12318 [2:12:06<18:59:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1281/12318 [2:12:11<18:58:55,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1281/12318 [2:12:11<18:58:55,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1282/12318 [2:12:15<18:58:31,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1282/12318 [2:12:15<18:58:31,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1283/12318 [2:12:20<18:58:14,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1283/12318 [2:12:20<18:58:14,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1284/12318 [2:12:23<18:57:45,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1284/12318 [2:12:23<18:57:45,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1285/12318 [2:12:29<18:57:34,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1285/12318 [2:12:29<18:57:34,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1286/12318 [2:12:32<18:57:04,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1286/12318 [2:12:32<18:57:04,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1287/12318 [2:12:37<18:56:40,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1287/12318 [2:12:37<18:56:40,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1288/12318 [2:12:38<18:55:52,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1288/12318 [2:12:38<18:55:52,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1289/12318 [2:12:43<18:55:41,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1289/12318 [2:12:43<18:55:41,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1290/12318 [2:12:46<18:55:02,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1290/12318 [2:12:46<18:55:02,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1291/12318 [2:12:53<18:55:01,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1291/12318 [2:12:53<18:55:01,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1292/12318 [2:12:58<18:54:49,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1292/12318 [2:12:58<18:54:49,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  10%| | 1293/12318 [2:13:07<18:55:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  10%| | 1293/12318 [2:13:07<18:55:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1294/12318 [2:13:16<18:55:21,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1294/12318 [2:13:16<18:55:21,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1295/12318 [2:13:22<18:55:19,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1295/12318 [2:13:22<18:55:19,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1296/12318 [2:13:31<18:55:35,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1296/12318 [2:13:31<18:55:35,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1297/12318 [2:13:33<18:54:52,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1297/12318 [2:13:33<18:54:52,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1298/12318 [2:13:40<18:54:52,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1298/12318 [2:13:40<18:54:52,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1299/12318 [2:13:42<18:54:09,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1299/12318 [2:13:42<18:54:09,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1300/12318 [2:13:46<18:53:44,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1300/12318 [2:13:46<18:53:44,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1301/12318 [2:13:52<18:53:42,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1301/12318 [2:13:52<18:53:42,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1302/12318 [2:13:57<18:53:21,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1302/12318 [2:13:57<18:53:21,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1303/12318 [2:13:58<18:52:34,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1303/12318 [2:13:58<18:52:34,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1304/12318 [2:14:03<18:52:19,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1304/12318 [2:14:03<18:52:19,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1305/12318 [2:14:08<18:52:03,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1305/12318 [2:14:08<18:52:03,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1306/12318 [2:14:16<18:52:09,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1306/12318 [2:14:16<18:52:09,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1307/12318 [2:14:24<18:52:17,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1307/12318 [2:14:24<18:52:17,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1308/12318 [2:14:26<18:51:43,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1308/12318 [2:14:27<18:51:43,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1309/12318 [2:14:29<18:51:09,  6.16s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1309/12318 [2:14:29<18:51:09,  6.16s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1310/12318 [2:14:38<18:51:24,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1310/12318 [2:14:38<18:51:24,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1311/12318 [2:14:42<18:50:59,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1311/12318 [2:14:42<18:50:59,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1312/12318 [2:15:31<18:56:55,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1312/12318 [2:15:31<18:56:55,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1313/12318 [2:15:38<18:56:53,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1313/12318 [2:15:38<18:56:53,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1314/12318 [2:15:43<18:56:38,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1314/12318 [2:15:43<18:56:38,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1315/12318 [2:15:49<18:56:27,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1315/12318 [2:15:49<18:56:27,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1316/12318 [2:15:57<18:56:41,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1316/12318 [2:15:57<18:56:41,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1317/12318 [2:15:59<18:55:55,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1317/12318 [2:15:59<18:55:55,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1318/12318 [2:16:07<18:56:02,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1318/12318 [2:16:07<18:56:02,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1319/12318 [2:16:09<18:55:20,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1319/12318 [2:16:09<18:55:20,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1320/12318 [2:16:14<18:55:04,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1320/12318 [2:16:14<18:55:04,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1321/12318 [2:16:15<18:54:22,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1321/12318 [2:16:15<18:54:22,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1322/12318 [2:16:19<18:53:53,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1322/12318 [2:16:19<18:53:53,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1323/12318 [2:16:27<18:54:00,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1323/12318 [2:16:27<18:54:00,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1324/12318 [2:16:34<18:54:06,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1324/12318 [2:16:34<18:54:06,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1325/12318 [2:16:39<18:53:50,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1325/12318 [2:16:39<18:53:50,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1326/12318 [2:16:48<18:54:05,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1326/12318 [2:16:48<18:54:05,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1327/12318 [2:16:50<18:53:27,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1327/12318 [2:16:50<18:53:27,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1328/12318 [2:16:56<18:53:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1328/12318 [2:16:56<18:53:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1329/12318 [2:16:59<18:52:42,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1329/12318 [2:16:59<18:52:42,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1330/12318 [2:17:05<18:52:40,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1330/12318 [2:17:05<18:52:40,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1331/12318 [2:17:07<18:51:53,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1331/12318 [2:17:07<18:51:53,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1332/12318 [2:17:15<18:52:07,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1332/12318 [2:17:15<18:52:07,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1333/12318 [2:17:22<18:52:05,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1333/12318 [2:17:22<18:52:05,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1334/12318 [2:17:24<18:51:23,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1334/12318 [2:17:24<18:51:23,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1335/12318 [2:17:25<18:50:37,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1335/12318 [2:17:25<18:50:37,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1336/12318 [2:17:31<18:50:26,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1336/12318 [2:17:31<18:50:26,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1337/12318 [2:17:35<18:50:01,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1337/12318 [2:17:35<18:50:01,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1338/12318 [2:17:39<18:49:42,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1338/12318 [2:17:39<18:49:42,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1339/12318 [2:17:43<18:49:17,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1339/12318 [2:17:43<18:49:17,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1340/12318 [2:17:47<18:48:49,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1340/12318 [2:17:47<18:48:49,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1341/12318 [2:17:52<18:48:38,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1341/12318 [2:17:52<18:48:38,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1342/12318 [2:17:54<18:47:52,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1342/12318 [2:17:54<18:47:52,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1343/12318 [2:17:59<18:47:42,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1343/12318 [2:17:59<18:47:42,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1344/12318 [2:18:56<18:54:29,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1344/12318 [2:18:56<18:54:29,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1345/12318 [2:19:05<18:54:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1345/12318 [2:19:05<18:54:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1346/12318 [2:19:06<18:53:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1346/12318 [2:19:06<18:53:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1347/12318 [2:19:10<18:53:29,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1347/12318 [2:19:10<18:53:29,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1348/12318 [2:19:17<18:53:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1348/12318 [2:19:17<18:53:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1349/12318 [2:19:24<18:53:33,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1349/12318 [2:19:24<18:53:33,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1350/12318 [2:19:33<18:53:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1350/12318 [2:19:33<18:53:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1351/12318 [2:19:38<18:53:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1351/12318 [2:19:38<18:53:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1352/12318 [2:19:46<18:53:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1352/12318 [2:19:46<18:53:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1353/12318 [2:19:55<18:53:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1353/12318 [2:19:55<18:53:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1354/12318 [2:20:00<18:53:41,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1354/12318 [2:20:00<18:53:41,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1355/12318 [2:20:06<18:53:38,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1355/12318 [2:20:06<18:53:38,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1356/12318 [2:20:10<18:53:13,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1356/12318 [2:20:10<18:53:13,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1357/12318 [2:20:17<18:53:09,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1357/12318 [2:20:17<18:53:09,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1358/12318 [2:20:25<18:53:22,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1358/12318 [2:20:25<18:53:22,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1359/12318 [2:20:27<18:52:41,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1359/12318 [2:20:27<18:52:41,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1360/12318 [2:20:34<18:52:38,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1360/12318 [2:20:34<18:52:38,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1361/12318 [2:20:37<18:52:05,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1361/12318 [2:20:37<18:52:05,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1362/12318 [2:20:40<18:51:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1362/12318 [2:20:40<18:51:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1363/12318 [2:20:48<18:51:40,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1363/12318 [2:20:48<18:51:40,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1364/12318 [2:20:54<18:51:37,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1364/12318 [2:20:54<18:51:37,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1365/12318 [2:20:56<18:50:55,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1365/12318 [2:20:56<18:50:55,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1366/12318 [2:21:01<18:50:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1366/12318 [2:21:01<18:50:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1367/12318 [2:21:09<18:50:49,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1367/12318 [2:21:09<18:50:49,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1368/12318 [2:21:15<18:50:37,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1368/12318 [2:21:15<18:50:37,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1369/12318 [2:21:23<18:50:50,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1369/12318 [2:21:23<18:50:50,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1370/12318 [2:21:26<18:50:17,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1370/12318 [2:21:26<18:50:17,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1371/12318 [2:21:32<18:50:07,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1371/12318 [2:21:32<18:50:07,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1372/12318 [2:21:35<18:49:35,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1372/12318 [2:21:35<18:49:35,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1373/12318 [2:21:36<18:48:50,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1373/12318 [2:21:36<18:48:50,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1374/12318 [2:21:44<18:48:55,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1374/12318 [2:21:44<18:48:55,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1375/12318 [2:21:51<18:49:02,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1375/12318 [2:21:51<18:49:02,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1376/12318 [2:22:25<18:52:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1376/12318 [2:22:25<18:52:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1377/12318 [2:22:31<18:52:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1377/12318 [2:22:31<18:52:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1378/12318 [2:22:36<18:52:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1378/12318 [2:22:36<18:52:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1379/12318 [2:22:43<18:52:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1379/12318 [2:22:43<18:52:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1380/12318 [2:22:48<18:51:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1380/12318 [2:22:48<18:51:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1381/12318 [2:22:51<18:51:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1381/12318 [2:22:51<18:51:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1382/12318 [2:22:58<18:51:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1382/12318 [2:22:58<18:51:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1383/12318 [2:23:03<18:51:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1383/12318 [2:23:03<18:51:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1384/12318 [2:23:07<18:50:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1384/12318 [2:23:07<18:50:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1385/12318 [2:23:16<18:50:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1385/12318 [2:23:16<18:50:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1386/12318 [2:23:25<18:51:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1386/12318 [2:23:25<18:51:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1387/12318 [2:23:33<18:51:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1387/12318 [2:23:33<18:51:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1388/12318 [2:23:36<18:50:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1388/12318 [2:23:36<18:50:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1389/12318 [2:23:43<18:50:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1389/12318 [2:23:43<18:50:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1390/12318 [2:23:47<18:50:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1390/12318 [2:23:47<18:50:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1391/12318 [2:23:50<18:49:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1391/12318 [2:23:50<18:49:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1392/12318 [2:23:56<18:49:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1392/12318 [2:23:56<18:49:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1393/12318 [2:23:57<18:49:02,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1393/12318 [2:23:57<18:49:02,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1394/12318 [2:24:01<18:48:39,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1394/12318 [2:24:01<18:48:39,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1395/12318 [2:24:08<18:48:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1395/12318 [2:24:08<18:48:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1396/12318 [2:24:13<18:48:24,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1396/12318 [2:24:13<18:48:24,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1397/12318 [2:24:20<18:48:22,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1397/12318 [2:24:20<18:48:22,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1398/12318 [2:24:24<18:47:59,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1398/12318 [2:24:24<18:47:59,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1399/12318 [2:24:27<18:47:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1399/12318 [2:24:27<18:47:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1400/12318 [2:24:36<18:47:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1400/12318 [2:24:36<18:47:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1401/12318 [2:24:45<18:47:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1401/12318 [2:24:45<18:47:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1402/12318 [2:24:50<18:47:42,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1402/12318 [2:24:50<18:47:42,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1403/12318 [2:24:53<18:47:10,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1403/12318 [2:24:53<18:47:10,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1404/12318 [2:25:00<18:47:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1404/12318 [2:25:00<18:47:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1405/12318 [2:25:05<18:46:56,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1405/12318 [2:25:05<18:46:56,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1406/12318 [2:25:09<18:46:32,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1406/12318 [2:25:09<18:46:32,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1407/12318 [2:25:11<18:45:56,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1407/12318 [2:25:11<18:45:56,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1408/12318 [2:25:39<18:48:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1408/12318 [2:25:39<18:48:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1409/12318 [2:25:45<18:48:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1409/12318 [2:25:45<18:48:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1410/12318 [2:25:48<18:48:01,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1410/12318 [2:25:48<18:48:01,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1411/12318 [2:25:51<18:47:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1411/12318 [2:25:51<18:47:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1412/12318 [2:25:57<18:47:23,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1412/12318 [2:25:57<18:47:23,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1413/12318 [2:25:59<18:46:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1413/12318 [2:25:59<18:46:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1414/12318 [2:26:02<18:46:12,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1414/12318 [2:26:02<18:46:12,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1415/12318 [2:26:06<18:45:48,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1415/12318 [2:26:06<18:45:48,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  11%| | 1416/12318 [2:26:10<18:45:21,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  11%| | 1416/12318 [2:26:10<18:45:21,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1417/12318 [2:26:14<18:45:02,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1417/12318 [2:26:14<18:45:02,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1418/12318 [2:26:23<18:45:16,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1418/12318 [2:26:23<18:45:16,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1419/12318 [2:26:26<18:44:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1419/12318 [2:26:26<18:44:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1420/12318 [2:26:33<18:44:50,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1420/12318 [2:26:33<18:44:50,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1421/12318 [2:26:41<18:44:55,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1421/12318 [2:26:41<18:44:55,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1422/12318 [2:26:46<18:44:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1422/12318 [2:26:46<18:44:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1423/12318 [2:26:51<18:44:25,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1423/12318 [2:26:51<18:44:25,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1424/12318 [2:26:55<18:44:02,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1424/12318 [2:26:55<18:44:02,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1425/12318 [2:27:00<18:43:43,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1425/12318 [2:27:00<18:43:43,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1426/12318 [2:27:08<18:43:56,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1426/12318 [2:27:08<18:43:56,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1427/12318 [2:27:15<18:43:53,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1427/12318 [2:27:15<18:43:53,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1428/12318 [2:27:21<18:43:42,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1428/12318 [2:27:21<18:43:42,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1429/12318 [2:27:27<18:43:39,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1429/12318 [2:27:27<18:43:39,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1430/12318 [2:27:35<18:43:44,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1430/12318 [2:27:35<18:43:44,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1431/12318 [2:27:43<18:43:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1431/12318 [2:27:43<18:43:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1432/12318 [2:27:45<18:43:18,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1432/12318 [2:27:45<18:43:18,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1433/12318 [2:27:53<18:43:23,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1433/12318 [2:27:53<18:43:24,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1434/12318 [2:27:55<18:42:40,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1434/12318 [2:27:55<18:42:40,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1435/12318 [2:27:59<18:42:22,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1435/12318 [2:27:59<18:42:22,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1436/12318 [2:28:08<18:42:35,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1436/12318 [2:28:08<18:42:35,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1437/12318 [2:28:12<18:42:12,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1437/12318 [2:28:12<18:42:12,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1438/12318 [2:28:21<18:42:26,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1438/12318 [2:28:21<18:42:26,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1439/12318 [2:28:26<18:42:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1439/12318 [2:28:26<18:42:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1440/12318 [2:29:00<18:45:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1440/12318 [2:29:00<18:45:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1441/12318 [2:29:04<18:45:13,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1441/12318 [2:29:04<18:45:13,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1442/12318 [2:29:08<18:44:54,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1442/12318 [2:29:08<18:44:54,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1443/12318 [2:29:12<18:44:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1443/12318 [2:29:12<18:44:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1444/12318 [2:29:16<18:44:04,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1444/12318 [2:29:16<18:44:04,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1445/12318 [2:29:19<18:43:38,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1445/12318 [2:29:19<18:43:38,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1446/12318 [2:29:21<18:42:59,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1446/12318 [2:29:21<18:42:59,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1447/12318 [2:29:25<18:42:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1447/12318 [2:29:25<18:42:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1448/12318 [2:29:27<18:41:57,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1448/12318 [2:29:27<18:41:57,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1449/12318 [2:29:30<18:41:27,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1449/12318 [2:29:30<18:41:27,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1450/12318 [2:29:37<18:41:25,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1450/12318 [2:29:37<18:41:25,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1451/12318 [2:29:43<18:41:22,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1451/12318 [2:29:43<18:41:22,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1452/12318 [2:29:48<18:41:03,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1452/12318 [2:29:48<18:41:03,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1453/12318 [2:29:53<18:40:48,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1453/12318 [2:29:53<18:40:48,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1454/12318 [2:29:58<18:40:38,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1454/12318 [2:29:58<18:40:38,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1455/12318 [2:30:02<18:40:15,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1455/12318 [2:30:02<18:40:15,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1456/12318 [2:30:11<18:40:28,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1456/12318 [2:30:11<18:40:28,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1457/12318 [2:30:13<18:39:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1457/12318 [2:30:13<18:39:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1458/12318 [2:30:14<18:39:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1458/12318 [2:30:14<18:39:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1459/12318 [2:30:17<18:38:32,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1459/12318 [2:30:17<18:38:32,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1460/12318 [2:30:19<18:37:57,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1460/12318 [2:30:19<18:37:57,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1461/12318 [2:30:27<18:38:01,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1461/12318 [2:30:27<18:38:01,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1462/12318 [2:30:30<18:37:35,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1462/12318 [2:30:30<18:37:35,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1463/12318 [2:30:39<18:37:47,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1463/12318 [2:30:39<18:37:47,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1464/12318 [2:30:45<18:37:43,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1464/12318 [2:30:45<18:37:43,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1465/12318 [2:30:49<18:37:20,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1465/12318 [2:30:49<18:37:20,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1466/12318 [2:30:54<18:37:06,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1466/12318 [2:30:54<18:37:06,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1467/12318 [2:30:56<18:36:27,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1467/12318 [2:30:56<18:36:27,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1468/12318 [2:31:00<18:36:05,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1468/12318 [2:31:00<18:36:05,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1469/12318 [2:31:09<18:36:17,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1469/12318 [2:31:09<18:36:17,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1470/12318 [2:31:13<18:35:55,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1470/12318 [2:31:13<18:35:55,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1471/12318 [2:31:20<18:35:59,  6.17s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1471/12318 [2:31:20<18:35:59,  6.17s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1472/12318 [2:32:22<18:42:43,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1472/12318 [2:32:22<18:42:43,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1473/12318 [2:32:26<18:42:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1473/12318 [2:32:26<18:42:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1474/12318 [2:32:31<18:42:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1474/12318 [2:32:31<18:42:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1475/12318 [2:32:36<18:41:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1475/12318 [2:32:36<18:41:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1476/12318 [2:32:40<18:41:27,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1476/12318 [2:32:40<18:41:27,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1477/12318 [2:32:45<18:41:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1477/12318 [2:32:45<18:41:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1478/12318 [2:32:50<18:41:00,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1478/12318 [2:32:50<18:41:00,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1479/12318 [2:32:59<18:41:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1479/12318 [2:32:59<18:41:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1480/12318 [2:33:04<18:40:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1480/12318 [2:33:04<18:40:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1481/12318 [2:33:06<18:40:23,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1481/12318 [2:33:06<18:40:23,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1482/12318 [2:33:14<18:40:27,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1482/12318 [2:33:14<18:40:27,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1483/12318 [2:33:22<18:40:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1483/12318 [2:33:22<18:40:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1484/12318 [2:33:26<18:40:13,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1484/12318 [2:33:26<18:40:13,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1485/12318 [2:33:27<18:39:31,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1485/12318 [2:33:27<18:39:31,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1486/12318 [2:33:32<18:39:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1486/12318 [2:33:32<18:39:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1487/12318 [2:33:39<18:39:13,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1487/12318 [2:33:39<18:39:13,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1488/12318 [2:33:46<18:39:10,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1488/12318 [2:33:46<18:39:10,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1489/12318 [2:33:52<18:39:07,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1489/12318 [2:33:52<18:39:07,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1490/12318 [2:33:56<18:38:40,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1490/12318 [2:33:56<18:38:40,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1491/12318 [2:34:02<18:38:37,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1491/12318 [2:34:02<18:38:37,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1492/12318 [2:34:06<18:38:14,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1492/12318 [2:34:06<18:38:14,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1493/12318 [2:34:12<18:38:03,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1493/12318 [2:34:12<18:38:03,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1494/12318 [2:34:19<18:38:07,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1494/12318 [2:34:19<18:38:07,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1495/12318 [2:34:25<18:37:56,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1495/12318 [2:34:25<18:37:56,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1496/12318 [2:34:27<18:37:18,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1496/12318 [2:34:27<18:37:18,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1497/12318 [2:34:34<18:37:22,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1497/12318 [2:34:34<18:37:22,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1498/12318 [2:34:42<18:37:26,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1498/12318 [2:34:42<18:37:26,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1499/12318 [2:34:44<18:36:48,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1499/12318 [2:34:44<18:36:48,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1500/12318 [2:34:52<18:37:00,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1500/12318 [2:34:52<18:37:00,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1501/12318 [2:34:54<18:36:22,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1501/12318 [2:34:54<18:36:22,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1502/12318 [2:34:59<18:36:03,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1502/12318 [2:34:59<18:36:03,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1503/12318 [2:35:05<18:36:00,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1503/12318 [2:35:05<18:36:00,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1504/12318 [2:35:38<18:39:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1504/12318 [2:35:38<18:39:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1505/12318 [2:35:45<18:39:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1505/12318 [2:35:45<18:39:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1506/12318 [2:35:48<18:38:34,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1506/12318 [2:35:48<18:38:34,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1507/12318 [2:35:50<18:37:56,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1507/12318 [2:35:50<18:37:56,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1508/12318 [2:35:55<18:37:45,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1508/12318 [2:35:55<18:37:45,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1509/12318 [2:36:00<18:37:30,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1509/12318 [2:36:00<18:37:30,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1510/12318 [2:36:08<18:37:35,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1510/12318 [2:36:08<18:37:35,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1511/12318 [2:36:17<18:37:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1511/12318 [2:36:17<18:37:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1512/12318 [2:36:23<18:37:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1512/12318 [2:36:23<18:37:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1513/12318 [2:36:32<18:37:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1513/12318 [2:36:32<18:37:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1514/12318 [2:36:38<18:37:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1514/12318 [2:36:38<18:37:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1515/12318 [2:36:47<18:37:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1515/12318 [2:36:47<18:37:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1516/12318 [2:36:51<18:37:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1516/12318 [2:36:51<18:37:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1517/12318 [2:36:56<18:37:22,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1517/12318 [2:36:56<18:37:22,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1518/12318 [2:37:01<18:37:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1518/12318 [2:37:01<18:37:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1519/12318 [2:37:04<18:36:38,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1519/12318 [2:37:04<18:36:38,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1520/12318 [2:37:12<18:36:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1520/12318 [2:37:12<18:36:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1521/12318 [2:37:19<18:36:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1521/12318 [2:37:19<18:36:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1522/12318 [2:37:20<18:36:05,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1522/12318 [2:37:20<18:36:05,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1523/12318 [2:37:27<18:36:02,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1523/12318 [2:37:27<18:36:02,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1524/12318 [2:37:29<18:35:29,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1524/12318 [2:37:29<18:35:29,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1525/12318 [2:37:36<18:35:25,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1525/12318 [2:37:36<18:35:25,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1526/12318 [2:37:41<18:35:14,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1526/12318 [2:37:41<18:35:14,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1527/12318 [2:37:46<18:34:55,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1527/12318 [2:37:46<18:34:55,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1528/12318 [2:37:51<18:34:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1528/12318 [2:37:51<18:34:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1529/12318 [2:37:57<18:34:33,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1529/12318 [2:37:57<18:34:33,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1530/12318 [2:38:04<18:34:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1530/12318 [2:38:04<18:34:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1531/12318 [2:38:08<18:34:14,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1531/12318 [2:38:08<18:34:14,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1532/12318 [2:38:13<18:33:56,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1532/12318 [2:38:13<18:33:56,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1533/12318 [2:38:14<18:33:19,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1533/12318 [2:38:14<18:33:19,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1534/12318 [2:38:20<18:33:08,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1534/12318 [2:38:20<18:33:08,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1535/12318 [2:38:24<18:32:46,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1535/12318 [2:38:24<18:32:46,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1536/12318 [2:38:55<18:35:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1536/12318 [2:38:55<18:35:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1537/12318 [2:39:01<18:35:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1537/12318 [2:39:01<18:35:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1538/12318 [2:39:08<18:35:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1538/12318 [2:39:08<18:35:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  12%| | 1539/12318 [2:39:11<18:34:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  12%| | 1539/12318 [2:39:11<18:34:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1540/12318 [2:39:16<18:34:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1540/12318 [2:39:16<18:34:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1541/12318 [2:39:19<18:34:15,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1541/12318 [2:39:19<18:34:15,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1542/12318 [2:39:27<18:34:19,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1542/12318 [2:39:27<18:34:19,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1543/12318 [2:39:29<18:33:46,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1543/12318 [2:39:29<18:33:46,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1544/12318 [2:39:31<18:33:06,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1544/12318 [2:39:31<18:33:06,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1545/12318 [2:39:32<18:32:29,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1545/12318 [2:39:32<18:32:29,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1546/12318 [2:39:34<18:31:49,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1546/12318 [2:39:34<18:31:49,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1547/12318 [2:39:37<18:31:20,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1547/12318 [2:39:37<18:31:20,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1548/12318 [2:39:43<18:31:18,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1548/12318 [2:39:43<18:31:18,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1549/12318 [2:39:48<18:31:00,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1549/12318 [2:39:48<18:31:00,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1550/12318 [2:39:56<18:31:04,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1550/12318 [2:39:56<18:31:04,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1551/12318 [2:40:00<18:30:46,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1551/12318 [2:40:00<18:30:46,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1552/12318 [2:40:06<18:30:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1552/12318 [2:40:06<18:30:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1553/12318 [2:40:14<18:30:47,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1553/12318 [2:40:14<18:30:47,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1554/12318 [2:40:20<18:30:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1554/12318 [2:40:20<18:30:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1555/12318 [2:40:27<18:30:40,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1555/12318 [2:40:27<18:30:40,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1556/12318 [2:40:33<18:30:30,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1556/12318 [2:40:33<18:30:30,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1557/12318 [2:40:36<18:30:04,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1557/12318 [2:40:36<18:30:04,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1558/12318 [2:40:38<18:29:28,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1558/12318 [2:40:38<18:29:28,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1559/12318 [2:40:44<18:29:17,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1559/12318 [2:40:44<18:29:17,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1560/12318 [2:40:48<18:28:56,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1560/12318 [2:40:48<18:28:56,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1561/12318 [2:40:49<18:28:16,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1561/12318 [2:40:49<18:28:16,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1562/12318 [2:40:57<18:28:20,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1562/12318 [2:40:57<18:28:20,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1563/12318 [2:41:06<18:28:32,  6.18s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1563/12318 [2:41:06<18:28:32,  6.18s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1564/12318 [2:41:15<18:28:45,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1564/12318 [2:41:15<18:28:45,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1565/12318 [2:41:21<18:28:42,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1565/12318 [2:41:21<18:28:42,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1566/12318 [2:41:28<18:28:39,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1566/12318 [2:41:28<18:28:39,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1567/12318 [2:41:34<18:28:36,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1567/12318 [2:41:34<18:28:36,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1568/12318 [2:42:19<18:32:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1568/12318 [2:42:19<18:32:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1569/12318 [2:42:28<18:33:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1569/12318 [2:42:28<18:33:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1570/12318 [2:42:35<18:33:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1570/12318 [2:42:35<18:33:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1571/12318 [2:42:39<18:32:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1571/12318 [2:42:39<18:32:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1572/12318 [2:42:47<18:32:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1572/12318 [2:42:47<18:32:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1573/12318 [2:42:50<18:32:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1573/12318 [2:42:50<18:32:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1574/12318 [2:42:57<18:32:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1574/12318 [2:42:57<18:32:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1575/12318 [2:43:02<18:32:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1575/12318 [2:43:02<18:32:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1576/12318 [2:43:09<18:32:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1576/12318 [2:43:09<18:32:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1577/12318 [2:43:15<18:31:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1577/12318 [2:43:15<18:31:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1578/12318 [2:43:23<18:32:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1578/12318 [2:43:23<18:32:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1579/12318 [2:43:30<18:32:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1579/12318 [2:43:30<18:32:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1580/12318 [2:43:37<18:32:02,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1580/12318 [2:43:37<18:32:02,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1581/12318 [2:43:42<18:31:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1581/12318 [2:43:42<18:31:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1582/12318 [2:43:45<18:31:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1582/12318 [2:43:45<18:31:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1583/12318 [2:43:49<18:31:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1583/12318 [2:43:49<18:31:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1584/12318 [2:43:56<18:30:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1584/12318 [2:43:56<18:30:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1585/12318 [2:44:05<18:31:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1585/12318 [2:44:05<18:31:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1586/12318 [2:44:12<18:31:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1586/12318 [2:44:12<18:31:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1587/12318 [2:44:19<18:31:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1587/12318 [2:44:19<18:31:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1588/12318 [2:44:23<18:30:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1588/12318 [2:44:23<18:30:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1589/12318 [2:44:28<18:30:34,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1589/12318 [2:44:28<18:30:34,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1590/12318 [2:44:34<18:30:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1590/12318 [2:44:34<18:30:23,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1591/12318 [2:44:36<18:29:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1591/12318 [2:44:36<18:29:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1592/12318 [2:44:39<18:29:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1592/12318 [2:44:39<18:29:23,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1593/12318 [2:44:46<18:29:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1593/12318 [2:44:46<18:29:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1594/12318 [2:44:49<18:28:51,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1594/12318 [2:44:49<18:28:51,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1595/12318 [2:44:50<18:28:12,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1595/12318 [2:44:50<18:28:12,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1596/12318 [2:44:53<18:27:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1596/12318 [2:44:53<18:27:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1597/12318 [2:44:57<18:27:23,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1597/12318 [2:44:57<18:27:23,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1598/12318 [2:45:01<18:27:01,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1598/12318 [2:45:01<18:27:01,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1599/12318 [2:45:04<18:26:33,  6.19s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1599/12318 [2:45:04<18:26:33,  6.19s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1600/12318 [2:45:28<18:28:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1600/12318 [2:45:28<18:28:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1601/12318 [2:45:51<18:30:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1601/12318 [2:45:51<18:30:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1602/12318 [2:45:52<18:29:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1602/12318 [2:45:52<18:29:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1603/12318 [2:46:01<18:29:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1603/12318 [2:46:01<18:29:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1604/12318 [2:46:06<18:29:34,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1604/12318 [2:46:06<18:29:34,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1605/12318 [2:46:12<18:29:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1605/12318 [2:46:12<18:29:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1606/12318 [2:46:15<18:28:54,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1606/12318 [2:46:15<18:28:54,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1607/12318 [2:46:21<18:28:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1607/12318 [2:46:21<18:28:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1608/12318 [2:46:28<18:28:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1608/12318 [2:46:28<18:28:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1609/12318 [2:46:36<18:28:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1609/12318 [2:46:36<18:28:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1610/12318 [2:46:38<18:28:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1610/12318 [2:46:38<18:28:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1611/12318 [2:46:40<18:27:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1611/12318 [2:46:40<18:27:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1612/12318 [2:46:47<18:27:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1612/12318 [2:46:47<18:27:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1613/12318 [2:46:54<18:27:43,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1613/12318 [2:46:54<18:27:43,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1614/12318 [2:47:01<18:27:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1614/12318 [2:47:01<18:27:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1615/12318 [2:47:05<18:27:22,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1615/12318 [2:47:05<18:27:22,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1616/12318 [2:47:11<18:27:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1616/12318 [2:47:11<18:27:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1617/12318 [2:47:15<18:26:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1617/12318 [2:47:15<18:26:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1618/12318 [2:47:21<18:26:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1618/12318 [2:47:21<18:26:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1619/12318 [2:47:30<18:26:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1619/12318 [2:47:30<18:26:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1620/12318 [2:47:38<18:27:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1620/12318 [2:47:38<18:27:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1621/12318 [2:47:44<18:26:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1621/12318 [2:47:44<18:26:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1622/12318 [2:47:53<18:27:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1622/12318 [2:47:53<18:27:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1623/12318 [2:47:54<18:26:27,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1623/12318 [2:47:54<18:26:27,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1624/12318 [2:48:02<18:26:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1624/12318 [2:48:02<18:26:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1625/12318 [2:48:07<18:26:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1625/12318 [2:48:07<18:26:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1626/12318 [2:48:12<18:26:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1626/12318 [2:48:12<18:26:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1627/12318 [2:48:16<18:25:43,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1627/12318 [2:48:16<18:25:43,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1628/12318 [2:48:25<18:25:54,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1628/12318 [2:48:25<18:25:54,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1629/12318 [2:48:28<18:25:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1629/12318 [2:48:28<18:25:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1630/12318 [2:48:34<18:25:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1630/12318 [2:48:34<18:25:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1631/12318 [2:48:43<18:25:30,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1631/12318 [2:48:43<18:25:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1632/12318 [2:49:12<18:27:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1632/12318 [2:49:12<18:27:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1633/12318 [2:49:19<18:27:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1633/12318 [2:49:19<18:27:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1634/12318 [2:49:20<18:27:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1634/12318 [2:49:20<18:27:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1635/12318 [2:49:25<18:27:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1635/12318 [2:49:25<18:27:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1636/12318 [2:49:32<18:26:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1636/12318 [2:49:32<18:26:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1637/12318 [2:49:41<18:27:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1637/12318 [2:49:41<18:27:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1638/12318 [2:49:47<18:27:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1638/12318 [2:49:47<18:27:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1639/12318 [2:49:55<18:27:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1639/12318 [2:49:55<18:27:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1640/12318 [2:50:00<18:26:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1640/12318 [2:50:00<18:26:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1641/12318 [2:50:01<18:26:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1641/12318 [2:50:01<18:26:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1642/12318 [2:50:08<18:26:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1642/12318 [2:50:08<18:26:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1643/12318 [2:50:16<18:26:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1643/12318 [2:50:16<18:26:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1644/12318 [2:50:17<18:25:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1644/12318 [2:50:17<18:25:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1645/12318 [2:50:25<18:25:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1645/12318 [2:50:25<18:25:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1646/12318 [2:50:28<18:25:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1646/12318 [2:50:28<18:25:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1647/12318 [2:50:33<18:25:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1647/12318 [2:50:33<18:25:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1648/12318 [2:50:42<18:25:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1648/12318 [2:50:42<18:25:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1649/12318 [2:50:44<18:24:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1649/12318 [2:50:44<18:24:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1650/12318 [2:50:46<18:24:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1650/12318 [2:50:46<18:24:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1651/12318 [2:50:53<18:24:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1651/12318 [2:50:53<18:24:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1652/12318 [2:50:58<18:23:55,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1652/12318 [2:50:58<18:23:55,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1653/12318 [2:51:04<18:23:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1653/12318 [2:51:04<18:23:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1654/12318 [2:51:10<18:23:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1654/12318 [2:51:10<18:23:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1655/12318 [2:51:17<18:23:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1655/12318 [2:51:17<18:23:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1656/12318 [2:51:22<18:23:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1656/12318 [2:51:22<18:23:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1657/12318 [2:51:27<18:23:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1657/12318 [2:51:27<18:23:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1658/12318 [2:51:30<18:22:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1658/12318 [2:51:30<18:22:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1659/12318 [2:51:33<18:22:14,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1659/12318 [2:51:33<18:22:14,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1660/12318 [2:51:41<18:22:18,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1660/12318 [2:51:41<18:22:18,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1661/12318 [2:51:42<18:21:43,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1661/12318 [2:51:42<18:21:43,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  13%|▏| 1662/12318 [2:51:48<18:21:33,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  13%|▏| 1662/12318 [2:51:48<18:21:33,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1663/12318 [2:51:57<18:21:42,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1663/12318 [2:51:57<18:21:42,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1664/12318 [2:52:17<18:23:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1664/12318 [2:52:17<18:23:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1665/12318 [2:52:20<18:22:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1665/12318 [2:52:20<18:22:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1666/12318 [2:52:28<18:22:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1666/12318 [2:52:28<18:22:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1667/12318 [2:52:30<18:22:15,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1667/12318 [2:52:30<18:22:15,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1668/12318 [2:52:37<18:22:11,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1668/12318 [2:52:37<18:22:11,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1669/12318 [2:52:44<18:22:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1669/12318 [2:52:44<18:22:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1670/12318 [2:52:46<18:21:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1670/12318 [2:52:46<18:21:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1671/12318 [2:52:52<18:21:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1671/12318 [2:52:52<18:21:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1672/12318 [2:52:58<18:21:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1672/12318 [2:52:58<18:21:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1673/12318 [2:53:04<18:21:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1673/12318 [2:53:04<18:21:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1674/12318 [2:53:11<18:21:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1674/12318 [2:53:11<18:21:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1675/12318 [2:53:16<18:21:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1675/12318 [2:53:16<18:21:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1676/12318 [2:53:22<18:20:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1676/12318 [2:53:22<18:20:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1677/12318 [2:53:24<18:20:18,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1677/12318 [2:53:24<18:20:18,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1678/12318 [2:53:33<18:20:28,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1678/12318 [2:53:33<18:20:28,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1679/12318 [2:53:38<18:20:18,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1679/12318 [2:53:38<18:20:18,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1680/12318 [2:53:42<18:19:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1680/12318 [2:53:42<18:19:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1681/12318 [2:53:48<18:19:46,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1681/12318 [2:53:48<18:19:46,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1682/12318 [2:53:53<18:19:35,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1682/12318 [2:53:53<18:19:35,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1683/12318 [2:54:02<18:19:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1683/12318 [2:54:02<18:19:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1684/12318 [2:54:07<18:19:30,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1684/12318 [2:54:07<18:19:30,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1685/12318 [2:54:12<18:19:19,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1685/12318 [2:54:12<18:19:19,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1686/12318 [2:54:19<18:19:15,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1686/12318 [2:54:19<18:19:15,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1687/12318 [2:54:24<18:19:04,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1687/12318 [2:54:24<18:19:04,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1688/12318 [2:54:32<18:19:06,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1688/12318 [2:54:32<18:19:06,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1689/12318 [2:54:36<18:18:49,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1689/12318 [2:54:36<18:18:49,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1690/12318 [2:54:45<18:18:58,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1690/12318 [2:54:45<18:18:58,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1691/12318 [2:54:50<18:18:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1691/12318 [2:54:50<18:18:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1692/12318 [2:54:56<18:18:40,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1692/12318 [2:54:56<18:18:40,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1693/12318 [2:55:05<18:18:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1693/12318 [2:55:05<18:18:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1694/12318 [2:55:08<18:18:21,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1694/12318 [2:55:08<18:18:21,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1695/12318 [2:55:09<18:17:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1695/12318 [2:55:09<18:17:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1696/12318 [2:55:49<18:21:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1696/12318 [2:55:49<18:21:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1697/12318 [2:55:53<18:20:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1697/12318 [2:55:53<18:20:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1698/12318 [2:55:59<18:20:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1698/12318 [2:55:59<18:20:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1699/12318 [2:56:05<18:20:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1699/12318 [2:56:05<18:20:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1700/12318 [2:56:07<18:20:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1700/12318 [2:56:07<18:20:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1701/12318 [2:56:13<18:19:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1701/12318 [2:56:13<18:19:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1702/12318 [2:56:15<18:19:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1702/12318 [2:56:15<18:19:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1703/12318 [2:56:24<18:19:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1703/12318 [2:56:24<18:19:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1704/12318 [2:56:33<18:19:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1704/12318 [2:56:33<18:19:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1705/12318 [2:56:37<18:19:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1705/12318 [2:56:37<18:19:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1706/12318 [2:56:46<18:19:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1706/12318 [2:56:46<18:19:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1707/12318 [2:56:55<18:19:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1707/12318 [2:56:55<18:19:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1708/12318 [2:57:00<18:19:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1708/12318 [2:57:00<18:19:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1709/12318 [2:57:08<18:19:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1709/12318 [2:57:08<18:19:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1710/12318 [2:57:09<18:18:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1710/12318 [2:57:09<18:18:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1711/12318 [2:57:13<18:18:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1711/12318 [2:57:13<18:18:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1712/12318 [2:57:19<18:18:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1712/12318 [2:57:19<18:18:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1713/12318 [2:57:22<18:18:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1713/12318 [2:57:22<18:18:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1714/12318 [2:57:28<18:17:58,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1714/12318 [2:57:28<18:17:58,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1715/12318 [2:57:33<18:17:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1715/12318 [2:57:33<18:17:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1716/12318 [2:57:37<18:17:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1716/12318 [2:57:37<18:17:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1717/12318 [2:57:45<18:17:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1717/12318 [2:57:45<18:17:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1718/12318 [2:57:53<18:17:35,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1718/12318 [2:57:53<18:17:35,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1719/12318 [2:58:02<18:17:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1719/12318 [2:58:02<18:17:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1720/12318 [2:58:09<18:17:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1720/12318 [2:58:09<18:17:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1721/12318 [2:58:13<18:17:26,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1721/12318 [2:58:13<18:17:26,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1722/12318 [2:58:18<18:17:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1722/12318 [2:58:18<18:17:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1723/12318 [2:58:25<18:17:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1723/12318 [2:58:25<18:17:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1724/12318 [2:58:29<18:16:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1724/12318 [2:58:29<18:16:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1725/12318 [2:58:34<18:16:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1725/12318 [2:58:34<18:16:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1726/12318 [2:58:39<18:16:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1726/12318 [2:58:39<18:16:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1727/12318 [2:58:44<18:16:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1727/12318 [2:58:44<18:16:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1728/12318 [2:59:11<18:18:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1728/12318 [2:59:11<18:18:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1729/12318 [2:59:17<18:18:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1729/12318 [2:59:17<18:18:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1730/12318 [2:59:19<18:17:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1730/12318 [2:59:19<18:17:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1731/12318 [2:59:25<18:17:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1731/12318 [2:59:25<18:17:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1732/12318 [2:59:27<18:16:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1732/12318 [2:59:27<18:16:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1733/12318 [2:59:34<18:16:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1733/12318 [2:59:34<18:16:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1734/12318 [2:59:38<18:16:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1734/12318 [2:59:38<18:16:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1735/12318 [2:59:43<18:16:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1735/12318 [2:59:43<18:16:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1736/12318 [2:59:47<18:15:58,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1736/12318 [2:59:47<18:15:58,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1737/12318 [2:59:49<18:15:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1737/12318 [2:59:49<18:15:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1738/12318 [2:59:56<18:15:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1738/12318 [2:59:56<18:15:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1739/12318 [3:00:00<18:15:04,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1739/12318 [3:00:00<18:15:04,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1740/12318 [3:00:05<18:14:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1740/12318 [3:00:05<18:14:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1741/12318 [3:00:13<18:14:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1741/12318 [3:00:13<18:14:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1742/12318 [3:00:20<18:14:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1742/12318 [3:00:20<18:14:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1743/12318 [3:00:25<18:14:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1743/12318 [3:00:25<18:14:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1744/12318 [3:00:32<18:14:35,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1744/12318 [3:00:32<18:14:35,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1745/12318 [3:00:34<18:14:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1745/12318 [3:00:34<18:14:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1746/12318 [3:00:39<18:13:55,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1746/12318 [3:00:39<18:13:55,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1747/12318 [3:00:43<18:13:35,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1747/12318 [3:00:43<18:13:35,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1748/12318 [3:00:51<18:13:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1748/12318 [3:00:51<18:13:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1749/12318 [3:00:56<18:13:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1749/12318 [3:00:56<18:13:23,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1750/12318 [3:00:59<18:12:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1750/12318 [3:00:59<18:12:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1751/12318 [3:01:06<18:13:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1751/12318 [3:01:06<18:13:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1752/12318 [3:01:13<18:12:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1752/12318 [3:01:13<18:12:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1753/12318 [3:01:16<18:12:30,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1753/12318 [3:01:16<18:12:30,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1754/12318 [3:01:21<18:12:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1754/12318 [3:01:21<18:12:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1755/12318 [3:01:26<18:12:05,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1755/12318 [3:01:26<18:12:05,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1756/12318 [3:01:32<18:11:55,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1756/12318 [3:01:32<18:11:55,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1757/12318 [3:01:40<18:11:58,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1757/12318 [3:01:40<18:11:58,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1758/12318 [3:01:48<18:12:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1758/12318 [3:01:48<18:12:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1759/12318 [3:01:52<18:11:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1759/12318 [3:01:52<18:11:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1760/12318 [3:02:30<18:14:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1760/12318 [3:02:30<18:14:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1761/12318 [3:02:35<18:14:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1761/12318 [3:02:35<18:14:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1762/12318 [3:02:38<18:14:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1762/12318 [3:02:38<18:14:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1763/12318 [3:02:41<18:13:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1763/12318 [3:02:41<18:13:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1764/12318 [3:02:45<18:13:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1764/12318 [3:02:45<18:13:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1765/12318 [3:02:47<18:12:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1765/12318 [3:02:47<18:12:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1766/12318 [3:02:56<18:13:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1766/12318 [3:02:56<18:13:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1767/12318 [3:02:59<18:12:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1767/12318 [3:02:59<18:12:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1768/12318 [3:03:05<18:12:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1768/12318 [3:03:05<18:12:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1769/12318 [3:03:12<18:12:30,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1769/12318 [3:03:12<18:12:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1770/12318 [3:03:16<18:12:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1770/12318 [3:03:16<18:12:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1771/12318 [3:03:21<18:11:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1771/12318 [3:03:21<18:11:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1772/12318 [3:03:30<18:12:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1772/12318 [3:03:30<18:12:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1773/12318 [3:03:37<18:12:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1773/12318 [3:03:37<18:12:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1774/12318 [3:03:46<18:12:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1774/12318 [3:03:46<18:12:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1775/12318 [3:03:54<18:12:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1775/12318 [3:03:54<18:12:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1776/12318 [3:03:59<18:12:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1776/12318 [3:03:59<18:12:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1777/12318 [3:04:06<18:12:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1777/12318 [3:04:06<18:12:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1778/12318 [3:04:07<18:11:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1778/12318 [3:04:07<18:11:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1779/12318 [3:04:13<18:11:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1779/12318 [3:04:13<18:11:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1780/12318 [3:04:19<18:11:15,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1780/12318 [3:04:19<18:11:15,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1781/12318 [3:04:24<18:11:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1781/12318 [3:04:24<18:11:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1782/12318 [3:04:29<18:10:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1782/12318 [3:04:29<18:10:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1783/12318 [3:04:35<18:10:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1783/12318 [3:04:35<18:10:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1784/12318 [3:04:41<18:10:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1784/12318 [3:04:41<18:10:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1785/12318 [3:04:48<18:10:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1785/12318 [3:04:48<18:10:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  14%|▏| 1786/12318 [3:04:52<18:10:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  14%|▏| 1786/12318 [3:04:52<18:10:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1787/12318 [3:04:58<18:10:04,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1787/12318 [3:04:58<18:10:04,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1788/12318 [3:05:02<18:09:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1788/12318 [3:05:02<18:09:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1789/12318 [3:05:10<18:09:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1789/12318 [3:05:10<18:09:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1790/12318 [3:05:19<18:09:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1790/12318 [3:05:19<18:09:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1791/12318 [3:05:25<18:09:55,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1791/12318 [3:05:25<18:09:55,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1792/12318 [3:05:50<18:11:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1792/12318 [3:05:50<18:11:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1793/12318 [3:05:59<18:11:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1793/12318 [3:05:59<18:11:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1794/12318 [3:06:03<18:11:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1794/12318 [3:06:03<18:11:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1795/12318 [3:06:09<18:11:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1795/12318 [3:06:09<18:11:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1796/12318 [3:06:11<18:10:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1796/12318 [3:06:11<18:10:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1797/12318 [3:06:17<18:10:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1797/12318 [3:06:17<18:10:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1798/12318 [3:06:21<18:10:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1798/12318 [3:06:21<18:10:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1799/12318 [3:06:23<18:09:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1799/12318 [3:06:23<18:09:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1800/12318 [3:06:25<18:09:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1800/12318 [3:06:25<18:09:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1801/12318 [3:06:30<18:09:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1801/12318 [3:06:30<18:09:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1802/12318 [3:06:39<18:09:16,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1802/12318 [3:06:39<18:09:16,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1803/12318 [3:06:42<18:08:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1803/12318 [3:06:42<18:08:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1804/12318 [3:06:45<18:08:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1804/12318 [3:06:45<18:08:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1805/12318 [3:06:51<18:08:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1805/12318 [3:06:51<18:08:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1806/12318 [3:06:57<18:08:15,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1806/12318 [3:06:57<18:08:15,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1807/12318 [3:07:06<18:08:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1807/12318 [3:07:06<18:08:23,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1808/12318 [3:07:14<18:08:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1808/12318 [3:07:14<18:08:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1809/12318 [3:07:17<18:08:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1809/12318 [3:07:17<18:08:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1810/12318 [3:07:26<18:08:11,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1810/12318 [3:07:26<18:08:11,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1811/12318 [3:07:29<18:07:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1811/12318 [3:07:29<18:07:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1812/12318 [3:07:35<18:07:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1812/12318 [3:07:35<18:07:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1813/12318 [3:07:43<18:07:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1813/12318 [3:07:43<18:07:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1814/12318 [3:07:50<18:07:43,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1814/12318 [3:07:50<18:07:43,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1815/12318 [3:07:58<18:07:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1815/12318 [3:07:58<18:07:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1816/12318 [3:08:07<18:07:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1816/12318 [3:08:07<18:07:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1817/12318 [3:08:14<18:07:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1817/12318 [3:08:14<18:07:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1818/12318 [3:08:19<18:07:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1818/12318 [3:08:19<18:07:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1819/12318 [3:08:26<18:07:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1819/12318 [3:08:26<18:07:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1820/12318 [3:08:32<18:07:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1820/12318 [3:08:32<18:07:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1821/12318 [3:08:39<18:07:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1821/12318 [3:08:39<18:07:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1822/12318 [3:08:44<18:07:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1822/12318 [3:08:44<18:07:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1823/12318 [3:08:45<18:06:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1823/12318 [3:08:45<18:06:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1824/12318 [3:09:11<18:08:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1824/12318 [3:09:11<18:08:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1825/12318 [3:09:17<18:08:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1825/12318 [3:09:17<18:08:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1826/12318 [3:09:23<18:08:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1826/12318 [3:09:23<18:08:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1827/12318 [3:09:26<18:07:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1827/12318 [3:09:26<18:07:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1828/12318 [3:09:29<18:07:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1828/12318 [3:09:29<18:07:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1829/12318 [3:09:34<18:07:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1829/12318 [3:09:34<18:07:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1830/12318 [3:09:38<18:06:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1830/12318 [3:09:38<18:06:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1831/12318 [3:09:44<18:06:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1831/12318 [3:09:44<18:06:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1832/12318 [3:09:48<18:06:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1832/12318 [3:09:48<18:06:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1833/12318 [3:09:53<18:06:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1833/12318 [3:09:53<18:06:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1834/12318 [3:09:59<18:06:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1834/12318 [3:09:59<18:06:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1835/12318 [3:10:04<18:05:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1835/12318 [3:10:04<18:05:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1836/12318 [3:10:08<18:05:34,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1836/12318 [3:10:08<18:05:34,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1837/12318 [3:10:14<18:05:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1837/12318 [3:10:14<18:05:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1838/12318 [3:10:23<18:05:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1838/12318 [3:10:23<18:05:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1839/12318 [3:10:29<18:05:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1839/12318 [3:10:29<18:05:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1840/12318 [3:10:38<18:05:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1840/12318 [3:10:38<18:05:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1841/12318 [3:10:42<18:05:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1841/12318 [3:10:42<18:05:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1842/12318 [3:10:47<18:05:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1842/12318 [3:10:47<18:05:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1843/12318 [3:10:54<18:05:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1843/12318 [3:10:54<18:05:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1844/12318 [3:11:02<18:05:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1844/12318 [3:11:02<18:05:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1845/12318 [3:11:10<18:05:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1845/12318 [3:11:10<18:05:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1846/12318 [3:11:15<18:04:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1846/12318 [3:11:15<18:04:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1847/12318 [3:11:23<18:05:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1847/12318 [3:11:23<18:05:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1848/12318 [3:11:26<18:04:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1848/12318 [3:11:26<18:04:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1849/12318 [3:11:31<18:04:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1849/12318 [3:11:31<18:04:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1850/12318 [3:11:39<18:04:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1850/12318 [3:11:39<18:04:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1851/12318 [3:11:44<18:04:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1851/12318 [3:11:44<18:04:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1852/12318 [3:11:50<18:04:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1852/12318 [3:11:50<18:04:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1853/12318 [3:11:56<18:03:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1853/12318 [3:11:56<18:03:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1854/12318 [3:12:01<18:03:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1854/12318 [3:12:01<18:03:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1855/12318 [3:12:08<18:03:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1855/12318 [3:12:08<18:03:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1856/12318 [3:12:24<18:04:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1856/12318 [3:12:24<18:04:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1857/12318 [3:12:30<18:04:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1857/12318 [3:12:30<18:04:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1858/12318 [3:12:36<18:04:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1858/12318 [3:12:36<18:04:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1859/12318 [3:12:39<18:03:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1859/12318 [3:12:39<18:03:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1860/12318 [3:12:43<18:03:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1860/12318 [3:12:43<18:03:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1861/12318 [3:12:52<18:03:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1861/12318 [3:12:52<18:03:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1862/12318 [3:12:59<18:03:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1862/12318 [3:12:59<18:03:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1863/12318 [3:13:06<18:03:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1863/12318 [3:13:06<18:03:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1864/12318 [3:13:09<18:03:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1864/12318 [3:13:09<18:03:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1865/12318 [3:13:18<18:03:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1865/12318 [3:13:18<18:03:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1866/12318 [3:13:21<18:03:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1866/12318 [3:13:21<18:03:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1867/12318 [3:13:27<18:02:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1867/12318 [3:13:27<18:02:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1868/12318 [3:13:33<18:02:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1868/12318 [3:13:33<18:02:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1869/12318 [3:13:40<18:02:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1869/12318 [3:13:40<18:02:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1870/12318 [3:13:48<18:02:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1870/12318 [3:13:48<18:02:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1871/12318 [3:13:50<18:02:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1871/12318 [3:13:50<18:02:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1872/12318 [3:13:53<18:01:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1872/12318 [3:13:53<18:01:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1873/12318 [3:13:57<18:01:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1873/12318 [3:13:57<18:01:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1874/12318 [3:14:00<18:01:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1874/12318 [3:14:00<18:01:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1875/12318 [3:14:09<18:01:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1875/12318 [3:14:09<18:01:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1876/12318 [3:14:12<18:00:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1876/12318 [3:14:12<18:00:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1877/12318 [3:14:18<18:00:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1877/12318 [3:14:18<18:00:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1878/12318 [3:14:23<18:00:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1878/12318 [3:14:23<18:00:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1879/12318 [3:14:32<18:00:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1879/12318 [3:14:32<18:00:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1880/12318 [3:14:37<18:00:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1880/12318 [3:14:37<18:00:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1881/12318 [3:14:45<18:00:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1881/12318 [3:14:45<18:00:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1882/12318 [3:14:54<18:00:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1882/12318 [3:14:54<18:00:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1883/12318 [3:14:59<18:00:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1883/12318 [3:14:59<18:00:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1884/12318 [3:15:08<18:00:43,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1884/12318 [3:15:08<18:00:43,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1885/12318 [3:15:14<18:00:34,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1885/12318 [3:15:14<18:00:34,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1886/12318 [3:15:19<18:00:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1886/12318 [3:15:19<18:00:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1887/12318 [3:15:23<18:00:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1887/12318 [3:15:23<18:00:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1888/12318 [3:16:16<18:04:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1888/12318 [3:16:16<18:04:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1889/12318 [3:16:19<18:03:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1889/12318 [3:16:19<18:03:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1890/12318 [3:16:26<18:03:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1890/12318 [3:16:26<18:03:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1891/12318 [3:16:32<18:03:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1891/12318 [3:16:32<18:03:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1892/12318 [3:16:37<18:03:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1892/12318 [3:16:37<18:03:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1893/12318 [3:16:41<18:03:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1893/12318 [3:16:41<18:03:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1894/12318 [3:16:46<18:02:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1894/12318 [3:16:46<18:02:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1895/12318 [3:16:53<18:02:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1895/12318 [3:16:53<18:02:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1896/12318 [3:16:56<18:02:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1896/12318 [3:16:56<18:02:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1897/12318 [3:17:01<18:02:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1897/12318 [3:17:01<18:02:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1898/12318 [3:17:02<18:01:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1898/12318 [3:17:02<18:01:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1899/12318 [3:17:04<18:01:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1899/12318 [3:17:04<18:01:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1900/12318 [3:17:10<18:01:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1900/12318 [3:17:10<18:01:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1901/12318 [3:17:14<18:00:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1901/12318 [3:17:14<18:00:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1902/12318 [3:17:22<18:00:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1902/12318 [3:17:22<18:00:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1903/12318 [3:17:27<18:00:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1903/12318 [3:17:27<18:00:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1904/12318 [3:17:34<18:00:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1904/12318 [3:17:34<18:00:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1905/12318 [3:17:43<18:00:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1905/12318 [3:17:43<18:00:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1906/12318 [3:17:48<18:00:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1906/12318 [3:17:48<18:00:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1907/12318 [3:17:55<18:00:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1907/12318 [3:17:55<18:00:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1908/12318 [3:18:01<18:00:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1908/12318 [3:18:01<18:00:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  15%|▏| 1909/12318 [3:18:10<18:00:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  15%|▏| 1909/12318 [3:18:10<18:00:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1910/12318 [3:18:11<18:00:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1910/12318 [3:18:11<18:00:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1911/12318 [3:18:18<17:59:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1911/12318 [3:18:18<17:59:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1912/12318 [3:18:20<17:59:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1912/12318 [3:18:20<17:59:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1913/12318 [3:18:27<17:59:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1913/12318 [3:18:27<17:59:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1914/12318 [3:18:30<17:59:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1914/12318 [3:18:30<17:59:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1915/12318 [3:18:37<17:58:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1915/12318 [3:18:37<17:58:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1916/12318 [3:18:38<17:58:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1916/12318 [3:18:38<17:58:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1917/12318 [3:18:45<17:58:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1917/12318 [3:18:45<17:58:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1918/12318 [3:18:52<17:58:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1918/12318 [3:18:52<17:58:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1919/12318 [3:18:55<17:57:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1919/12318 [3:18:55<17:57:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1920/12318 [3:19:29<18:00:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1920/12318 [3:19:29<18:00:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1921/12318 [3:19:34<18:00:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1921/12318 [3:19:34<18:00:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1922/12318 [3:19:35<17:59:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1922/12318 [3:19:35<17:59:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1923/12318 [3:19:44<17:59:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1923/12318 [3:19:44<17:59:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1924/12318 [3:19:46<17:59:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1924/12318 [3:19:46<17:59:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1925/12318 [3:19:52<17:59:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1925/12318 [3:19:52<17:59:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1926/12318 [3:19:58<17:59:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1926/12318 [3:19:58<17:59:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1927/12318 [3:20:02<17:58:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1927/12318 [3:20:02<17:58:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1928/12318 [3:20:10<17:58:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1928/12318 [3:20:10<17:58:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1929/12318 [3:20:11<17:58:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1929/12318 [3:20:11<17:58:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1930/12318 [3:20:16<17:57:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1930/12318 [3:20:16<17:57:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1931/12318 [3:20:20<17:57:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1931/12318 [3:20:20<17:57:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1932/12318 [3:20:25<17:57:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1932/12318 [3:20:25<17:57:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1933/12318 [3:20:26<17:56:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1933/12318 [3:20:26<17:56:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1934/12318 [3:20:30<17:56:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1934/12318 [3:20:30<17:56:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1935/12318 [3:20:32<17:56:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1935/12318 [3:20:32<17:56:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1936/12318 [3:20:39<17:56:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1936/12318 [3:20:39<17:56:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1937/12318 [3:20:43<17:55:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1937/12318 [3:20:43<17:55:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1938/12318 [3:20:46<17:55:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1938/12318 [3:20:46<17:55:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1939/12318 [3:20:52<17:55:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1939/12318 [3:20:52<17:55:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1940/12318 [3:21:00<17:55:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1940/12318 [3:21:00<17:55:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1941/12318 [3:21:09<17:55:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1941/12318 [3:21:09<17:55:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1942/12318 [3:21:17<17:55:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1942/12318 [3:21:17<17:55:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1943/12318 [3:21:23<17:55:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1943/12318 [3:21:23<17:55:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1944/12318 [3:21:28<17:55:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1944/12318 [3:21:28<17:55:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1945/12318 [3:21:33<17:54:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1945/12318 [3:21:33<17:54:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1946/12318 [3:21:35<17:54:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1946/12318 [3:21:35<17:54:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1947/12318 [3:21:38<17:54:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1947/12318 [3:21:38<17:54:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1948/12318 [3:21:42<17:53:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1948/12318 [3:21:42<17:53:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1949/12318 [3:21:46<17:53:26,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1949/12318 [3:21:46<17:53:26,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1950/12318 [3:21:52<17:53:22,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1950/12318 [3:21:52<17:53:22,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1951/12318 [3:22:00<17:53:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1951/12318 [3:22:00<17:53:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1952/12318 [3:22:50<17:57:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1952/12318 [3:22:50<17:57:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1953/12318 [3:22:58<17:57:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1953/12318 [3:22:58<17:57:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1954/12318 [3:23:01<17:56:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1954/12318 [3:23:01<17:56:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1955/12318 [3:23:07<17:56:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1955/12318 [3:23:07<17:56:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1956/12318 [3:23:11<17:56:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1956/12318 [3:23:11<17:56:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1957/12318 [3:23:16<17:56:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1957/12318 [3:23:16<17:56:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1958/12318 [3:23:19<17:55:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1958/12318 [3:23:19<17:55:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1959/12318 [3:23:24<17:55:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1959/12318 [3:23:24<17:55:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1960/12318 [3:23:28<17:55:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1960/12318 [3:23:28<17:55:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1961/12318 [3:23:32<17:55:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1961/12318 [3:23:32<17:55:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1962/12318 [3:23:38<17:54:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1962/12318 [3:23:38<17:54:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1963/12318 [3:23:39<17:54:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1963/12318 [3:23:39<17:54:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1964/12318 [3:23:45<17:54:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1964/12318 [3:23:45<17:54:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1965/12318 [3:23:47<17:53:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1965/12318 [3:23:47<17:53:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1966/12318 [3:23:54<17:53:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1966/12318 [3:23:54<17:53:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1967/12318 [3:24:02<17:53:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1967/12318 [3:24:02<17:53:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1968/12318 [3:24:06<17:53:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1968/12318 [3:24:06<17:53:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1969/12318 [3:24:08<17:53:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1969/12318 [3:24:08<17:53:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1970/12318 [3:24:10<17:52:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1970/12318 [3:24:10<17:52:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1971/12318 [3:24:14<17:52:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1971/12318 [3:24:14<17:52:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1972/12318 [3:24:16<17:51:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1972/12318 [3:24:16<17:51:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1973/12318 [3:24:18<17:51:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1973/12318 [3:24:18<17:51:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1974/12318 [3:24:21<17:50:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1974/12318 [3:24:21<17:50:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1975/12318 [3:24:26<17:50:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1975/12318 [3:24:26<17:50:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1976/12318 [3:24:27<17:50:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1976/12318 [3:24:27<17:50:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1977/12318 [3:24:36<17:50:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1977/12318 [3:24:36<17:50:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1978/12318 [3:24:37<17:49:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1978/12318 [3:24:37<17:49:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1979/12318 [3:24:46<17:49:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1979/12318 [3:24:46<17:49:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1980/12318 [3:24:51<17:49:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1980/12318 [3:24:51<17:49:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1981/12318 [3:24:55<17:49:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1981/12318 [3:24:55<17:49:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1982/12318 [3:25:00<17:49:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1982/12318 [3:25:00<17:49:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1983/12318 [3:25:08<17:49:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1983/12318 [3:25:08<17:49:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1984/12318 [3:26:18<17:54:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1984/12318 [3:26:18<17:54:35,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1985/12318 [3:26:20<17:54:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1985/12318 [3:26:20<17:54:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1986/12318 [3:26:27<17:54:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1986/12318 [3:26:27<17:54:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1987/12318 [3:26:31<17:53:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1987/12318 [3:26:31<17:53:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1988/12318 [3:26:34<17:53:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1988/12318 [3:26:34<17:53:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1989/12318 [3:26:36<17:52:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1989/12318 [3:26:36<17:52:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1990/12318 [3:26:45<17:53:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1990/12318 [3:26:45<17:53:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1991/12318 [3:26:51<17:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1991/12318 [3:26:51<17:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1992/12318 [3:26:56<17:52:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1992/12318 [3:26:56<17:52:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1993/12318 [3:27:05<17:52:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1993/12318 [3:27:05<17:52:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1994/12318 [3:27:10<17:52:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1994/12318 [3:27:10<17:52:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1995/12318 [3:27:15<17:52:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1995/12318 [3:27:15<17:52:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1996/12318 [3:27:21<17:52:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1996/12318 [3:27:21<17:52:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1997/12318 [3:27:23<17:51:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1997/12318 [3:27:23<17:51:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1998/12318 [3:27:27<17:51:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1998/12318 [3:27:27<17:51:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 1999/12318 [3:27:32<17:51:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 1999/12318 [3:27:32<17:51:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2000/12318 [3:27:40<17:51:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2000/12318 [3:27:40<17:51:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2001/12318 [3:27:44<17:51:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2001/12318 [3:27:44<17:51:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2002/12318 [3:27:51<17:51:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2002/12318 [3:27:51<17:51:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2003/12318 [3:27:57<17:50:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2003/12318 [3:27:57<17:50:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2004/12318 [3:28:05<17:50:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2004/12318 [3:28:05<17:50:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2005/12318 [3:28:07<17:50:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2005/12318 [3:28:07<17:50:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2006/12318 [3:28:13<17:50:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2006/12318 [3:28:13<17:50:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2007/12318 [3:28:16<17:50:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2007/12318 [3:28:16<17:50:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2008/12318 [3:28:22<17:49:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2008/12318 [3:28:22<17:49:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2009/12318 [3:28:28<17:49:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2009/12318 [3:28:28<17:49:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2010/12318 [3:28:33<17:49:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2010/12318 [3:28:33<17:49:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2011/12318 [3:28:41<17:49:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2011/12318 [3:28:41<17:49:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2012/12318 [3:28:47<17:49:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2012/12318 [3:28:47<17:49:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2013/12318 [3:28:53<17:49:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2013/12318 [3:28:53<17:49:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2014/12318 [3:28:58<17:49:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2014/12318 [3:28:58<17:49:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|��| 2015/12318 [3:29:02<17:48:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2015/12318 [3:29:02<17:48:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2016/12318 [3:29:26<17:50:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2016/12318 [3:29:26<17:50:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2017/12318 [3:29:29<17:49:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2017/12318 [3:29:29<17:49:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2018/12318 [3:29:38<17:50:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2018/12318 [3:29:38<17:50:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2019/12318 [3:29:46<17:50:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2019/12318 [3:29:46<17:50:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2020/12318 [3:29:48<17:49:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2020/12318 [3:29:48<17:49:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2021/12318 [3:29:52<17:49:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2021/12318 [3:29:52<17:49:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2022/12318 [3:30:00<17:49:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2022/12318 [3:30:00<17:49:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2023/12318 [3:30:06<17:49:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2023/12318 [3:30:06<17:49:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2024/12318 [3:30:13<17:49:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2024/12318 [3:30:13<17:49:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2025/12318 [3:30:17<17:48:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2025/12318 [3:30:17<17:48:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2026/12318 [3:30:20<17:48:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2026/12318 [3:30:20<17:48:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2027/12318 [3:30:24<17:48:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2027/12318 [3:30:24<17:48:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2028/12318 [3:30:32<17:48:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2028/12318 [3:30:32<17:48:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2029/12318 [3:30:40<17:48:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2029/12318 [3:30:40<17:48:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2030/12318 [3:30:46<17:48:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2030/12318 [3:30:46<17:48:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2031/12318 [3:30:52<17:48:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2031/12318 [3:30:52<17:48:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  16%|▏| 2032/12318 [3:30:57<17:47:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  16%|▏| 2032/12318 [3:30:57<17:47:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2033/12318 [3:31:06<17:48:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2033/12318 [3:31:06<17:48:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2034/12318 [3:31:08<17:47:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2034/12318 [3:31:08<17:47:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2035/12318 [3:31:15<17:47:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2035/12318 [3:31:15<17:47:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2036/12318 [3:31:21<17:47:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2036/12318 [3:31:21<17:47:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2037/12318 [3:31:24<17:47:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2037/12318 [3:31:24<17:47:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2038/12318 [3:31:30<17:46:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2038/12318 [3:31:30<17:46:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2039/12318 [3:31:35<17:46:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2039/12318 [3:31:35<17:46:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2040/12318 [3:31:41<17:46:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2040/12318 [3:31:41<17:46:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2041/12318 [3:31:44<17:46:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2041/12318 [3:31:44<17:46:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2042/12318 [3:31:47<17:45:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2042/12318 [3:31:47<17:45:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2043/12318 [3:31:51<17:45:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2043/12318 [3:31:51<17:45:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2044/12318 [3:31:56<17:45:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2044/12318 [3:31:56<17:45:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2045/12318 [3:32:03<17:45:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2045/12318 [3:32:03<17:45:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2046/12318 [3:32:12<17:45:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2046/12318 [3:32:12<17:45:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2047/12318 [3:32:21<17:45:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2047/12318 [3:32:21<17:45:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2048/12318 [3:32:42<17:46:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2048/12318 [3:32:42<17:46:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2049/12318 [3:32:51<17:46:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2049/12318 [3:32:51<17:46:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2050/12318 [3:32:55<17:46:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2050/12318 [3:32:55<17:46:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2051/12318 [3:33:02<17:46:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2051/12318 [3:33:02<17:46:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2052/12318 [3:33:04<17:45:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2052/12318 [3:33:04<17:45:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2053/12318 [3:33:08<17:45:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2053/12318 [3:33:08<17:45:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2054/12318 [3:33:13<17:45:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2054/12318 [3:33:13<17:45:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2055/12318 [3:33:21<17:45:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2055/12318 [3:33:21<17:45:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2056/12318 [3:33:29<17:45:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2056/12318 [3:33:29<17:45:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2057/12318 [3:33:33<17:45:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2057/12318 [3:33:33<17:45:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2058/12318 [3:33:39<17:45:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2058/12318 [3:33:39<17:45:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2059/12318 [3:33:41<17:44:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2059/12318 [3:33:41<17:44:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2060/12318 [3:33:48<17:44:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2060/12318 [3:33:48<17:44:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2061/12318 [3:33:52<17:44:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2061/12318 [3:33:52<17:44:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2062/12318 [3:34:00<17:44:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2062/12318 [3:34:00<17:44:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2063/12318 [3:34:07<17:44:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2063/12318 [3:34:07<17:44:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2064/12318 [3:34:10<17:43:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2064/12318 [3:34:10<17:43:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2065/12318 [3:34:18<17:44:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2065/12318 [3:34:18<17:44:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2066/12318 [3:34:21<17:43:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2066/12318 [3:34:21<17:43:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2067/12318 [3:34:30<17:43:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2067/12318 [3:34:30<17:43:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2068/12318 [3:34:32<17:43:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2068/12318 [3:34:32<17:43:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2069/12318 [3:34:39<17:43:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2069/12318 [3:34:39<17:43:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2070/12318 [3:34:48<17:43:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2070/12318 [3:34:48<17:43:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2071/12318 [3:34:56<17:43:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2071/12318 [3:34:56<17:43:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2072/12318 [3:35:00<17:43:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2072/12318 [3:35:00<17:43:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2073/12318 [3:35:08<17:43:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2073/12318 [3:35:08<17:43:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2074/12318 [3:35:12<17:43:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2074/12318 [3:35:12<17:43:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2075/12318 [3:35:17<17:42:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2075/12318 [3:35:17<17:42:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2076/12318 [3:35:26<17:42:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2076/12318 [3:35:26<17:42:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2077/12318 [3:35:32<17:42:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2077/12318 [3:35:32<17:42:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2078/12318 [3:35:35<17:42:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2078/12318 [3:35:35<17:42:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2079/12318 [3:35:40<17:42:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2079/12318 [3:35:40<17:42:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2080/12318 [3:35:56<17:42:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2080/12318 [3:35:56<17:42:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2081/12318 [3:35:57<17:42:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2081/12318 [3:35:57<17:42:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2082/12318 [3:36:01<17:42:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2082/12318 [3:36:01<17:42:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2083/12318 [3:36:10<17:42:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2083/12318 [3:36:10<17:42:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2084/12318 [3:36:16<17:42:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2084/12318 [3:36:16<17:42:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2085/12318 [3:36:18<17:41:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2085/12318 [3:36:18<17:41:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2086/12318 [3:36:20<17:41:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2086/12318 [3:36:20<17:41:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2087/12318 [3:36:25<17:40:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2087/12318 [3:36:25<17:40:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2088/12318 [3:36:32<17:40:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2088/12318 [3:36:32<17:40:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2089/12318 [3:36:37<17:40:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2089/12318 [3:36:37<17:40:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2090/12318 [3:36:42<17:40:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2090/12318 [3:36:42<17:40:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2091/12318 [3:36:44<17:40:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2091/12318 [3:36:44<17:40:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2092/12318 [3:36:53<17:40:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2092/12318 [3:36:53<17:40:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2093/12318 [3:37:00<17:40:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2093/12318 [3:37:00<17:40:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2094/12318 [3:37:05<17:39:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2094/12318 [3:37:05<17:39:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2095/12318 [3:37:12<17:39:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2095/12318 [3:37:12<17:39:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2096/12318 [3:37:14<17:39:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2096/12318 [3:37:14<17:39:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2097/12318 [3:37:21<17:39:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2097/12318 [3:37:21<17:39:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2098/12318 [3:37:24<17:39:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2098/12318 [3:37:24<17:39:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2099/12318 [3:37:29<17:38:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2099/12318 [3:37:29<17:38:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2100/12318 [3:37:38<17:38:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2100/12318 [3:37:38<17:38:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2101/12318 [3:37:46<17:38:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2101/12318 [3:37:46<17:38:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2102/12318 [3:37:51<17:38:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2102/12318 [3:37:51<17:38:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2103/12318 [3:37:59<17:38:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2103/12318 [3:37:59<17:38:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2104/12318 [3:38:01<17:38:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2104/12318 [3:38:01<17:38:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2105/12318 [3:38:09<17:38:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2105/12318 [3:38:09<17:38:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2106/12318 [3:38:10<17:37:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2106/12318 [3:38:10<17:37:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2107/12318 [3:38:14<17:37:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2107/12318 [3:38:14<17:37:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2108/12318 [3:38:15<17:37:08,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2108/12318 [3:38:15<17:37:08,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2109/12318 [3:38:18<17:36:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2109/12318 [3:38:18<17:36:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2110/12318 [3:38:27<17:36:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2110/12318 [3:38:27<17:36:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2111/12318 [3:38:29<17:36:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2111/12318 [3:38:29<17:36:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2112/12318 [3:39:09<17:39:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2112/12318 [3:39:09<17:39:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2113/12318 [3:39:18<17:39:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2113/12318 [3:39:18<17:39:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2114/12318 [3:39:23<17:38:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2114/12318 [3:39:23<17:38:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2115/12318 [3:39:32<17:39:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2115/12318 [3:39:32<17:39:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2116/12318 [3:39:38<17:38:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2116/12318 [3:39:38<17:38:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2117/12318 [3:39:43<17:38:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2117/12318 [3:39:43<17:38:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2118/12318 [3:39:51<17:38:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2118/12318 [3:39:51<17:38:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2119/12318 [3:39:57<17:38:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2119/12318 [3:39:57<17:38:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2120/12318 [3:40:05<17:38:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2120/12318 [3:40:05<17:38:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2121/12318 [3:40:11<17:38:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2121/12318 [3:40:11<17:38:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2122/12318 [3:40:15<17:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2122/12318 [3:40:15<17:38:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2123/12318 [3:40:23<17:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2123/12318 [3:40:23<17:38:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2124/12318 [3:40:31<17:38:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2124/12318 [3:40:31<17:38:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2125/12318 [3:40:37<17:38:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2125/12318 [3:40:37<17:38:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2126/12318 [3:40:40<17:37:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2126/12318 [3:40:40<17:37:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2127/12318 [3:40:46<17:37:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2127/12318 [3:40:46<17:37:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2128/12318 [3:40:53<17:37:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2128/12318 [3:40:53<17:37:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2129/12318 [3:41:00<17:37:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2129/12318 [3:41:00<17:37:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2130/12318 [3:41:07<17:37:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2130/12318 [3:41:07<17:37:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2131/12318 [3:41:10<17:37:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2131/12318 [3:41:10<17:37:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2132/12318 [3:41:15<17:37:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2132/12318 [3:41:15<17:37:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2133/12318 [3:41:18<17:36:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2133/12318 [3:41:18<17:36:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2134/12318 [3:41:22<17:36:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2134/12318 [3:41:22<17:36:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2135/12318 [3:41:28<17:36:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2135/12318 [3:41:28<17:36:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2136/12318 [3:41:34<17:36:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2136/12318 [3:41:34<17:36:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2137/12318 [3:41:37<17:35:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2137/12318 [3:41:37<17:35:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2138/12318 [3:41:43<17:35:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2138/12318 [3:41:43<17:35:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2139/12318 [3:41:46<17:35:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2139/12318 [3:41:46<17:35:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2140/12318 [3:41:53<17:35:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2140/12318 [3:41:53<17:35:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2141/12318 [3:42:00<17:35:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2141/12318 [3:42:00<17:35:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2142/12318 [3:42:06<17:35:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2142/12318 [3:42:06<17:35:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2143/12318 [3:42:14<17:35:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2143/12318 [3:42:14<17:35:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2144/12318 [3:42:29<17:35:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2144/12318 [3:42:29<17:35:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2145/12318 [3:42:37<17:35:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2145/12318 [3:42:37<17:35:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2146/12318 [3:42:44<17:35:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2146/12318 [3:42:44<17:35:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2147/12318 [3:42:46<17:35:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2147/12318 [3:42:46<17:35:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2148/12318 [3:42:54<17:35:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2148/12318 [3:42:54<17:35:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2149/12318 [3:43:01<17:35:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2149/12318 [3:43:01<17:35:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2150/12318 [3:43:05<17:35:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2150/12318 [3:43:05<17:35:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2151/12318 [3:43:12<17:35:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2151/12318 [3:43:12<17:35:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2152/12318 [3:43:15<17:34:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2152/12318 [3:43:15<17:34:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2153/12318 [3:43:19<17:34:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2153/12318 [3:43:19<17:34:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2154/12318 [3:43:27<17:34:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2154/12318 [3:43:27<17:34:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  17%|▏| 2155/12318 [3:43:32<17:34:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  17%|▏| 2155/12318 [3:43:32<17:34:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2156/12318 [3:43:41<17:34:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2156/12318 [3:43:41<17:34:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2157/12318 [3:43:46<17:34:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2157/12318 [3:43:46<17:34:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2158/12318 [3:43:50<17:33:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2158/12318 [3:43:50<17:33:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2159/12318 [3:43:59<17:33:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2159/12318 [3:43:59<17:33:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2160/12318 [3:44:04<17:33:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2160/12318 [3:44:04<17:33:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2161/12318 [3:44:11<17:33:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2161/12318 [3:44:11<17:33:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2162/12318 [3:44:14<17:33:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2162/12318 [3:44:14<17:33:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2163/12318 [3:44:19<17:33:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2163/12318 [3:44:19<17:33:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2164/12318 [3:44:22<17:32:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2164/12318 [3:44:22<17:32:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2165/12318 [3:44:29<17:32:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2165/12318 [3:44:29<17:32:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2166/12318 [3:44:31<17:32:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2166/12318 [3:44:31<17:32:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2167/12318 [3:44:36<17:32:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2167/12318 [3:44:36<17:32:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2168/12318 [3:44:43<17:32:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2168/12318 [3:44:43<17:32:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2169/12318 [3:44:47<17:31:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2169/12318 [3:44:47<17:31:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2170/12318 [3:44:55<17:31:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2170/12318 [3:44:55<17:31:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2171/12318 [3:44:59<17:31:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2171/12318 [3:44:59<17:31:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2172/12318 [3:45:08<17:31:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2172/12318 [3:45:08<17:31:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2173/12318 [3:45:09<17:31:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2173/12318 [3:45:09<17:31:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2174/12318 [3:45:12<17:30:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2174/12318 [3:45:12<17:30:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2175/12318 [3:45:17<17:30:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2175/12318 [3:45:17<17:30:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2176/12318 [3:45:45<17:32:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2176/12318 [3:45:45<17:32:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2177/12318 [3:45:49<17:31:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2177/12318 [3:45:49<17:31:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2178/12318 [3:45:54<17:31:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2178/12318 [3:45:54<17:31:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2179/12318 [3:46:00<17:31:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2179/12318 [3:46:00<17:31:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2180/12318 [3:46:06<17:31:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2180/12318 [3:46:06<17:31:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2181/12318 [3:46:11<17:31:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2181/12318 [3:46:11<17:31:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2182/12318 [3:46:20<17:31:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2182/12318 [3:46:20<17:31:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2183/12318 [3:46:27<17:31:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2183/12318 [3:46:27<17:31:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2184/12318 [3:46:31<17:31:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2184/12318 [3:46:31<17:31:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2185/12318 [3:46:32<17:30:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2185/12318 [3:46:32<17:30:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2186/12318 [3:46:37<17:30:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2186/12318 [3:46:37<17:30:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2187/12318 [3:46:46<17:30:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2187/12318 [3:46:46<17:30:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2188/12318 [3:46:50<17:30:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2188/12318 [3:46:50<17:30:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2189/12318 [3:46:54<17:29:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2189/12318 [3:46:54<17:29:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2190/12318 [3:46:59<17:29:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2190/12318 [3:46:59<17:29:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2191/12318 [3:47:01<17:29:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2191/12318 [3:47:01<17:29:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2192/12318 [3:47:05<17:29:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2192/12318 [3:47:05<17:29:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2193/12318 [3:47:10<17:28:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2193/12318 [3:47:10<17:28:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2194/12318 [3:47:12<17:28:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2194/12318 [3:47:12<17:28:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2195/12318 [3:47:13<17:27:55,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2195/12318 [3:47:13<17:27:55,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2196/12318 [3:47:21<17:27:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2196/12318 [3:47:21<17:27:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2197/12318 [3:47:29<17:28:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2197/12318 [3:47:29<17:28:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2198/12318 [3:47:36<17:27:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2198/12318 [3:47:36<17:27:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2199/12318 [3:47:44<17:28:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2199/12318 [3:47:44<17:28:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2200/12318 [3:47:51<17:27:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2200/12318 [3:47:51<17:27:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2201/12318 [3:47:55<17:27:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2201/12318 [3:47:55<17:27:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2202/12318 [3:48:03<17:27:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2202/12318 [3:48:03<17:27:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2203/12318 [3:48:12<17:27:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2203/12318 [3:48:12<17:27:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2204/12318 [3:48:19<17:27:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2204/12318 [3:48:19<17:27:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2205/12318 [3:48:28<17:27:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2205/12318 [3:48:28<17:27:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2206/12318 [3:48:32<17:27:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2206/12318 [3:48:32<17:27:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2207/12318 [3:48:36<17:27:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2207/12318 [3:48:36<17:27:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2208/12318 [3:49:03<17:28:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2208/12318 [3:49:03<17:28:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2209/12318 [3:49:10<17:28:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2209/12318 [3:49:10<17:28:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2210/12318 [3:49:17<17:28:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2210/12318 [3:49:17<17:28:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2211/12318 [3:49:22<17:28:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2211/12318 [3:49:22<17:28:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2212/12318 [3:49:28<17:28:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2212/12318 [3:49:28<17:28:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2213/12318 [3:49:31<17:28:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2213/12318 [3:49:31<17:28:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2214/12318 [3:49:36<17:27:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2214/12318 [3:49:36<17:27:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2215/12318 [3:49:42<17:27:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2215/12318 [3:49:42<17:27:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2216/12318 [3:49:51<17:27:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2216/12318 [3:49:51<17:27:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2217/12318 [3:49:56<17:27:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2217/12318 [3:49:56<17:27:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2218/12318 [3:50:04<17:27:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2218/12318 [3:50:04<17:27:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2219/12318 [3:50:12<17:27:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2219/12318 [3:50:12<17:27:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2220/12318 [3:50:17<17:27:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2220/12318 [3:50:17<17:27:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2221/12318 [3:50:23<17:27:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2221/12318 [3:50:23<17:27:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2222/12318 [3:50:27<17:27:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2222/12318 [3:50:27<17:27:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2223/12318 [3:50:32<17:26:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2223/12318 [3:50:32<17:26:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2224/12318 [3:50:36<17:26:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2224/12318 [3:50:36<17:26:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2225/12318 [3:50:44<17:26:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2225/12318 [3:50:44<17:26:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2226/12318 [3:50:47<17:26:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2226/12318 [3:50:47<17:26:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2227/12318 [3:50:55<17:26:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2227/12318 [3:50:55<17:26:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2228/12318 [3:51:02<17:26:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2228/12318 [3:51:02<17:26:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2229/12318 [3:51:08<17:26:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2229/12318 [3:51:08<17:26:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2230/12318 [3:51:09<17:25:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2230/12318 [3:51:09<17:25:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2231/12318 [3:51:11<17:25:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2231/12318 [3:51:11<17:25:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2232/12318 [3:51:20<17:25:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2232/12318 [3:51:20<17:25:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2233/12318 [3:51:23<17:25:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2233/12318 [3:51:23<17:25:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2234/12318 [3:51:26<17:24:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2234/12318 [3:51:26<17:24:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2235/12318 [3:51:35<17:24:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2235/12318 [3:51:35<17:24:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2236/12318 [3:51:40<17:24:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2236/12318 [3:51:40<17:24:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2237/12318 [3:51:48<17:24:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2237/12318 [3:51:48<17:24:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2238/12318 [3:51:54<17:24:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2238/12318 [3:51:54<17:24:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2239/12318 [3:52:01<17:24:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2239/12318 [3:52:01<17:24:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2240/12318 [3:52:14<17:24:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2240/12318 [3:52:14<17:24:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2241/12318 [3:52:19<17:24:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2241/12318 [3:52:19<17:24:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2242/12318 [3:52:25<17:24:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2242/12318 [3:52:25<17:24:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2243/12318 [3:52:26<17:24:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2243/12318 [3:52:26<17:24:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2244/12318 [3:52:27<17:23:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2244/12318 [3:52:27<17:23:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2245/12318 [3:52:36<17:23:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2245/12318 [3:52:36<17:23:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2246/12318 [3:52:41<17:23:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2246/12318 [3:52:41<17:23:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2247/12318 [3:52:46<17:23:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2247/12318 [3:52:46<17:23:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2248/12318 [3:52:51<17:23:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2248/12318 [3:52:51<17:23:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2249/12318 [3:53:00<17:23:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2249/12318 [3:53:00<17:23:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2250/12318 [3:53:06<17:23:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2250/12318 [3:53:06<17:23:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2251/12318 [3:53:15<17:23:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2251/12318 [3:53:15<17:23:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2252/12318 [3:53:20<17:23:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2252/12318 [3:53:20<17:23:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2253/12318 [3:53:23<17:22:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2253/12318 [3:53:23<17:22:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2254/12318 [3:53:26<17:22:16,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2254/12318 [3:53:26<17:22:16,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2255/12318 [3:53:31<17:22:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2255/12318 [3:53:31<17:22:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2256/12318 [3:53:36<17:21:56,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2256/12318 [3:53:36<17:21:56,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2257/12318 [3:53:42<17:21:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2257/12318 [3:53:42<17:21:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2258/12318 [3:53:50<17:21:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2258/12318 [3:53:50<17:21:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2259/12318 [3:53:55<17:21:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2259/12318 [3:53:55<17:21:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2260/12318 [3:54:02<17:21:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2260/12318 [3:54:02<17:21:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2261/12318 [3:54:07<17:21:22,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2261/12318 [3:54:07<17:21:22,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2262/12318 [3:54:14<17:21:22,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2262/12318 [3:54:14<17:21:22,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2263/12318 [3:54:21<17:21:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2263/12318 [3:54:21<17:21:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2264/12318 [3:54:24<17:20:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2264/12318 [3:54:24<17:20:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2265/12318 [3:54:30<17:20:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2265/12318 [3:54:30<17:20:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2266/12318 [3:54:35<17:20:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2266/12318 [3:54:35<17:20:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2267/12318 [3:54:43<17:20:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2267/12318 [3:54:43<17:20:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2268/12318 [3:54:50<17:20:37,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2268/12318 [3:54:50<17:20:37,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2269/12318 [3:54:58<17:20:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2269/12318 [3:54:58<17:20:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2270/12318 [3:55:02<17:20:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2270/12318 [3:55:02<17:20:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2271/12318 [3:55:10<17:20:26,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2271/12318 [3:55:10<17:20:26,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2272/12318 [3:55:30<17:21:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2272/12318 [3:55:30<17:21:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2273/12318 [3:55:38<17:21:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2273/12318 [3:55:38<17:21:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2274/12318 [3:55:45<17:21:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2274/12318 [3:55:45<17:21:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2275/12318 [3:55:49<17:21:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2275/12318 [3:55:49<17:21:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2276/12318 [3:55:54<17:20:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2276/12318 [3:55:54<17:20:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2277/12318 [3:56:02<17:20:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2277/12318 [3:56:02<17:20:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  18%|▏| 2278/12318 [3:56:04<17:20:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  18%|▏| 2278/12318 [3:56:04<17:20:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2279/12318 [3:56:11<17:20:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2279/12318 [3:56:11<17:20:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2280/12318 [3:56:18<17:20:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2280/12318 [3:56:18<17:20:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2281/12318 [3:56:26<17:20:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2281/12318 [3:56:26<17:20:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2282/12318 [3:56:32<17:20:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2282/12318 [3:56:32<17:20:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2283/12318 [3:56:37<17:20:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2283/12318 [3:56:37<17:20:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2284/12318 [3:56:46<17:20:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2284/12318 [3:56:46<17:20:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2285/12318 [3:56:52<17:20:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2285/12318 [3:56:52<17:20:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2286/12318 [3:57:00<17:20:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2286/12318 [3:57:00<17:20:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2287/12318 [3:57:05<17:19:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2287/12318 [3:57:05<17:19:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2288/12318 [3:57:10<17:19:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2288/12318 [3:57:10<17:19:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2289/12318 [3:57:13<17:19:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2289/12318 [3:57:13<17:19:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2290/12318 [3:57:19<17:19:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2290/12318 [3:57:19<17:19:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2291/12318 [3:57:28<17:19:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2291/12318 [3:57:28<17:19:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2292/12318 [3:57:31<17:19:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2292/12318 [3:57:31<17:19:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2293/12318 [3:57:40<17:19:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2293/12318 [3:57:40<17:19:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2294/12318 [3:57:42<17:18:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2294/12318 [3:57:42<17:18:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2295/12318 [3:57:50<17:18:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2295/12318 [3:57:50<17:18:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2296/12318 [3:57:58<17:18:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2296/12318 [3:57:58<17:18:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2297/12318 [3:58:02<17:18:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2297/12318 [3:58:02<17:18:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2298/12318 [3:58:06<17:18:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2298/12318 [3:58:06<17:18:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2299/12318 [3:58:13<17:18:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2299/12318 [3:58:13<17:18:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2300/12318 [3:58:20<17:18:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2300/12318 [3:58:20<17:18:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2301/12318 [3:58:26<17:18:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2301/12318 [3:58:26<17:18:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2302/12318 [3:58:35<17:18:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2302/12318 [3:58:35<17:18:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2303/12318 [3:58:41<17:18:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2303/12318 [3:58:41<17:18:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2304/12318 [3:58:57<17:18:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2304/12318 [3:58:57<17:18:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2305/12318 [3:58:59<17:18:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2305/12318 [3:58:59<17:18:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2306/12318 [3:59:02<17:17:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2306/12318 [3:59:02<17:17:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2307/12318 [3:59:07<17:17:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2307/12318 [3:59:07<17:17:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2308/12318 [3:59:14<17:17:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2308/12318 [3:59:14<17:17:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2309/12318 [3:59:18<17:17:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2309/12318 [3:59:18<17:17:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2310/12318 [3:59:23<17:17:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2310/12318 [3:59:23<17:17:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2311/12318 [3:59:29<17:17:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2311/12318 [3:59:29<17:17:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2312/12318 [3:59:31<17:16:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2312/12318 [3:59:31<17:16:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2313/12318 [3:59:36<17:16:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2313/12318 [3:59:36<17:16:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2314/12318 [3:59:41<17:16:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2314/12318 [3:59:41<17:16:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2315/12318 [3:59:48<17:16:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2315/12318 [3:59:48<17:16:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2316/12318 [3:59:49<17:15:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2316/12318 [3:59:49<17:15:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2317/12318 [3:59:55<17:15:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2317/12318 [3:59:55<17:15:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2318/12318 [4:00:02<17:15:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2318/12318 [4:00:02<17:15:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2319/12318 [4:00:09<17:15:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2319/12318 [4:00:09<17:15:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2320/12318 [4:00:15<17:15:23,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2320/12318 [4:00:15<17:15:23,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2321/12318 [4:00:24<17:15:27,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2321/12318 [4:00:24<17:15:27,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2322/12318 [4:00:29<17:15:18,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2322/12318 [4:00:29<17:15:18,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2323/12318 [4:00:35<17:15:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2323/12318 [4:00:35<17:15:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2324/12318 [4:00:40<17:15:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2324/12318 [4:00:40<17:15:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2325/12318 [4:00:44<17:14:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2325/12318 [4:00:44<17:14:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2326/12318 [4:00:46<17:14:20,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2326/12318 [4:00:46<17:14:20,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2327/12318 [4:00:50<17:14:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2327/12318 [4:00:50<17:14:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2328/12318 [4:00:52<17:13:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2328/12318 [4:00:52<17:13:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2329/12318 [4:00:54<17:13:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2329/12318 [4:00:54<17:13:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2330/12318 [4:01:01<17:13:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2330/12318 [4:01:01<17:13:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2331/12318 [4:01:08<17:13:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2331/12318 [4:01:08<17:13:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2332/12318 [4:01:11<17:12:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2332/12318 [4:01:11<17:12:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2333/12318 [4:01:13<17:12:24,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2333/12318 [4:01:13<17:12:24,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2334/12318 [4:01:21<17:12:28,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2334/12318 [4:01:21<17:12:28,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2335/12318 [4:01:23<17:12:01,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2335/12318 [4:01:23<17:12:01,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2336/12318 [4:02:14<17:15:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2336/12318 [4:02:14<17:15:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2337/12318 [4:02:19<17:14:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2337/12318 [4:02:19<17:14:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2338/12318 [4:02:27<17:14:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2338/12318 [4:02:27<17:14:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2339/12318 [4:02:35<17:14:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2339/12318 [4:02:35<17:14:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2340/12318 [4:02:44<17:15:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2340/12318 [4:02:44<17:15:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2341/12318 [4:02:49<17:14:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2341/12318 [4:02:49<17:14:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2342/12318 [4:02:50<17:14:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2342/12318 [4:02:50<17:14:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2343/12318 [4:02:57<17:14:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2343/12318 [4:02:57<17:14:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2344/12318 [4:03:00<17:14:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2344/12318 [4:03:00<17:14:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2345/12318 [4:03:02<17:13:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2345/12318 [4:03:02<17:13:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2346/12318 [4:03:08<17:13:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2346/12318 [4:03:09<17:13:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2347/12318 [4:03:13<17:13:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2347/12318 [4:03:13<17:13:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2348/12318 [4:03:22<17:13:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2348/12318 [4:03:22<17:13:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2349/12318 [4:03:26<17:13:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2349/12318 [4:03:26<17:13:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2350/12318 [4:03:30<17:12:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2350/12318 [4:03:30<17:12:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2351/12318 [4:03:32<17:12:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2351/12318 [4:03:32<17:12:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2352/12318 [4:03:41<17:12:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2352/12318 [4:03:41<17:12:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2353/12318 [4:03:43<17:12:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2353/12318 [4:03:43<17:12:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2354/12318 [4:03:48<17:12:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2354/12318 [4:03:48<17:12:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2355/12318 [4:03:51<17:11:40,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2355/12318 [4:03:51<17:11:40,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2356/12318 [4:03:55<17:11:24,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2356/12318 [4:03:55<17:11:24,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2357/12318 [4:03:59<17:11:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2357/12318 [4:03:59<17:11:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2358/12318 [4:04:01<17:10:46,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2358/12318 [4:04:01<17:10:46,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2359/12318 [4:04:05<17:10:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2359/12318 [4:04:05<17:10:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2360/12318 [4:04:09<17:10:14,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2360/12318 [4:04:09<17:10:14,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2361/12318 [4:04:11<17:09:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2361/12318 [4:04:11<17:09:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2362/12318 [4:04:16<17:09:36,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2362/12318 [4:04:16<17:09:36,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2363/12318 [4:04:20<17:09:22,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2363/12318 [4:04:20<17:09:22,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2364/12318 [4:04:26<17:09:13,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2364/12318 [4:04:26<17:09:13,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2365/12318 [4:04:27<17:08:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2365/12318 [4:04:27<17:08:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2366/12318 [4:04:34<17:08:42,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2366/12318 [4:04:34<17:08:42,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2367/12318 [4:04:42<17:08:46,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2367/12318 [4:04:42<17:08:46,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2368/12318 [4:05:25<17:11:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2368/12318 [4:05:25<17:11:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2369/12318 [4:05:33<17:11:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2369/12318 [4:05:33<17:11:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2370/12318 [4:05:39<17:11:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2370/12318 [4:05:39<17:11:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2371/12318 [4:05:42<17:10:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2371/12318 [4:05:42<17:10:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2372/12318 [4:05:49<17:10:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2372/12318 [4:05:49<17:10:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2373/12318 [4:05:53<17:10:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2373/12318 [4:05:53<17:10:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2374/12318 [4:06:02<17:10:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2374/12318 [4:06:02<17:10:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2375/12318 [4:06:08<17:10:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2375/12318 [4:06:08<17:10:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2376/12318 [4:06:15<17:10:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2376/12318 [4:06:15<17:10:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2377/12318 [4:06:20<17:10:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2377/12318 [4:06:20<17:10:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2378/12318 [4:06:27<17:10:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2378/12318 [4:06:27<17:10:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2379/12318 [4:06:28<17:09:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2379/12318 [4:06:28<17:09:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2380/12318 [4:06:34<17:09:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2380/12318 [4:06:34<17:09:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2381/12318 [4:06:35<17:09:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2381/12318 [4:06:35<17:09:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2382/12318 [4:06:38<17:08:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2382/12318 [4:06:38<17:08:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2383/12318 [4:06:42<17:08:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2383/12318 [4:06:42<17:08:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2384/12318 [4:06:50<17:08:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2384/12318 [4:06:50<17:08:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2385/12318 [4:06:59<17:08:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2385/12318 [4:06:59<17:08:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2386/12318 [4:07:07<17:08:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2386/12318 [4:07:07<17:08:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2387/12318 [4:07:12<17:08:30,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2387/12318 [4:07:12<17:08:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2388/12318 [4:07:21<17:08:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2388/12318 [4:07:21<17:08:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2389/12318 [4:07:22<17:08:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2389/12318 [4:07:22<17:08:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2390/12318 [4:07:24<17:07:45,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2390/12318 [4:07:24<17:07:45,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2391/12318 [4:07:29<17:07:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2391/12318 [4:07:29<17:07:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2392/12318 [4:07:33<17:07:18,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2392/12318 [4:07:33<17:07:18,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2393/12318 [4:07:35<17:06:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2393/12318 [4:07:35<17:06:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2394/12318 [4:07:38<17:06:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2394/12318 [4:07:38<17:06:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2395/12318 [4:07:42<17:06:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2395/12318 [4:07:42<17:06:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2396/12318 [4:07:45<17:05:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2396/12318 [4:07:45<17:05:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2397/12318 [4:07:49<17:05:44,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2397/12318 [4:07:49<17:05:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2398/12318 [4:07:54<17:05:32,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2398/12318 [4:07:54<17:05:32,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2399/12318 [4:07:57<17:05:12,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2399/12318 [4:07:57<17:05:12,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2400/12318 [4:08:38<17:07:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2400/12318 [4:08:38<17:07:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2401/12318 [4:09:08<17:09:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2401/12318 [4:09:08<17:09:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  19%|▏| 2402/12318 [4:09:09<17:08:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  19%|▏| 2402/12318 [4:09:09<17:08:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2403/12318 [4:09:15<17:08:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2403/12318 [4:09:15<17:08:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2404/12318 [4:09:22<17:08:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2404/12318 [4:09:22<17:08:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2405/12318 [4:09:25<17:08:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2405/12318 [4:09:25<17:08:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2406/12318 [4:09:30<17:07:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2406/12318 [4:09:30<17:07:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2407/12318 [4:09:35<17:07:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2407/12318 [4:09:35<17:07:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2408/12318 [4:09:43<17:07:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2408/12318 [4:09:43<17:07:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2409/12318 [4:09:47<17:07:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2409/12318 [4:09:47<17:07:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2410/12318 [4:09:51<17:07:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2410/12318 [4:09:51<17:07:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2411/12318 [4:09:56<17:07:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2411/12318 [4:09:56<17:07:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2412/12318 [4:10:00<17:06:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2412/12318 [4:10:00<17:06:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2413/12318 [4:10:09<17:06:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2413/12318 [4:10:09<17:06:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2414/12318 [4:10:13<17:06:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2414/12318 [4:10:13<17:06:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2415/12318 [4:10:22<17:06:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2415/12318 [4:10:22<17:06:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2416/12318 [4:10:27<17:06:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2416/12318 [4:10:27<17:06:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2417/12318 [4:10:32<17:06:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2417/12318 [4:10:32<17:06:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2418/12318 [4:10:37<17:06:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2418/12318 [4:10:37<17:06:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2419/12318 [4:10:44<17:06:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2419/12318 [4:10:44<17:06:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2420/12318 [4:10:48<17:05:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2420/12318 [4:10:48<17:05:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2421/12318 [4:10:57<17:05:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2421/12318 [4:10:57<17:05:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2422/12318 [4:11:03<17:05:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2422/12318 [4:11:03<17:05:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2423/12318 [4:11:06<17:05:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2423/12318 [4:11:06<17:05:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2424/12318 [4:11:11<17:05:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2424/12318 [4:11:11<17:05:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2425/12318 [4:11:16<17:05:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2425/12318 [4:11:16<17:05:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2426/12318 [4:11:20<17:04:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2426/12318 [4:11:20<17:04:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2427/12318 [4:11:24<17:04:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2427/12318 [4:11:24<17:04:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2428/12318 [4:11:25<17:04:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2428/12318 [4:11:25<17:04:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2429/12318 [4:11:32<17:04:04,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2429/12318 [4:11:32<17:04:04,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2430/12318 [4:11:39<17:04:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2430/12318 [4:11:39<17:04:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2431/12318 [4:11:44<17:03:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2431/12318 [4:11:44<17:03:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2432/12318 [4:12:16<17:05:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2432/12318 [4:12:16<17:05:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2433/12318 [4:12:24<17:05:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2433/12318 [4:12:24<17:05:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2434/12318 [4:12:27<17:05:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2434/12318 [4:12:27<17:05:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2435/12318 [4:12:33<17:05:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2435/12318 [4:12:33<17:05:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2436/12318 [4:12:42<17:05:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2436/12318 [4:12:42<17:05:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2437/12318 [4:12:51<17:05:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2437/12318 [4:12:51<17:05:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2438/12318 [4:12:58<17:05:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2438/12318 [4:12:58<17:05:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2439/12318 [4:13:07<17:05:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2439/12318 [4:13:07<17:05:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2440/12318 [4:13:14<17:05:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2440/12318 [4:13:14<17:05:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2441/12318 [4:13:21<17:05:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2441/12318 [4:13:21<17:05:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2442/12318 [4:13:30<17:05:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2442/12318 [4:13:30<17:05:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2443/12318 [4:13:38<17:05:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2443/12318 [4:13:38<17:05:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2444/12318 [4:13:42<17:05:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2444/12318 [4:13:42<17:05:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2445/12318 [4:13:44<17:04:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2445/12318 [4:13:44<17:04:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2446/12318 [4:13:47<17:04:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2446/12318 [4:13:47<17:04:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2447/12318 [4:13:55<17:04:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2447/12318 [4:13:55<17:04:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2448/12318 [4:13:56<17:03:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2448/12318 [4:13:56<17:03:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2449/12318 [4:13:59<17:03:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2449/12318 [4:13:59<17:03:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2450/12318 [4:14:05<17:03:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2450/12318 [4:14:05<17:03:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2451/12318 [4:14:12<17:03:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2451/12318 [4:14:12<17:03:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2452/12318 [4:14:17<17:03:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2452/12318 [4:14:17<17:03:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2453/12318 [4:14:24<17:03:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2453/12318 [4:14:24<17:03:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2454/12318 [4:14:28<17:02:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2454/12318 [4:14:28<17:02:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2455/12318 [4:14:32<17:02:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2455/12318 [4:14:32<17:02:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2456/12318 [4:14:38<17:02:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2456/12318 [4:14:38<17:02:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2457/12318 [4:14:45<17:02:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2457/12318 [4:14:45<17:02:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2458/12318 [4:14:46<17:02:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2458/12318 [4:14:46<17:02:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2459/12318 [4:14:55<17:02:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2459/12318 [4:14:55<17:02:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2460/12318 [4:15:00<17:01:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2460/12318 [4:15:00<17:01:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2461/12318 [4:15:07<17:01:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2461/12318 [4:15:07<17:01:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2462/12318 [4:15:15<17:01:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2462/12318 [4:15:15<17:01:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2463/12318 [4:15:20<17:01:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2463/12318 [4:15:20<17:01:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2464/12318 [4:15:39<17:02:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2464/12318 [4:15:39<17:02:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2465/12318 [4:15:45<17:02:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2465/12318 [4:15:45<17:02:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2466/12318 [4:15:47<17:01:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2466/12318 [4:15:47<17:01:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2467/12318 [4:15:56<17:02:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2467/12318 [4:15:56<17:02:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2468/12318 [4:16:01<17:01:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2468/12318 [4:16:01<17:01:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2469/12318 [4:16:03<17:01:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2469/12318 [4:16:03<17:01:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2470/12318 [4:16:05<17:01:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2470/12318 [4:16:05<17:01:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2471/12318 [4:16:06<17:00:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2471/12318 [4:16:06<17:00:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2472/12318 [4:16:08<17:00:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2472/12318 [4:16:08<17:00:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2473/12318 [4:16:13<17:00:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2473/12318 [4:16:13<17:00:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2474/12318 [4:16:16<16:59:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2474/12318 [4:16:16<16:59:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2475/12318 [4:16:18<16:59:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2475/12318 [4:16:18<16:59:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2476/12318 [4:16:21<16:59:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2476/12318 [4:16:21<16:59:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2477/12318 [4:16:30<16:59:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2477/12318 [4:16:30<16:59:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2478/12318 [4:16:38<16:59:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2478/12318 [4:16:38<16:59:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2479/12318 [4:16:40<16:58:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2479/12318 [4:16:40<16:58:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2480/12318 [4:16:44<16:58:28,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2480/12318 [4:16:44<16:58:28,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2481/12318 [4:16:52<16:58:28,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2481/12318 [4:16:52<16:58:28,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2482/12318 [4:16:56<16:58:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2482/12318 [4:16:56<16:58:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2483/12318 [4:17:01<16:58:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2483/12318 [4:17:01<16:58:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2484/12318 [4:17:02<16:57:38,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2484/12318 [4:17:02<16:57:38,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2485/12318 [4:17:08<16:57:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2485/12318 [4:17:08<16:57:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2486/12318 [4:17:17<16:57:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2486/12318 [4:17:17<16:57:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2487/12318 [4:17:21<16:57:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2487/12318 [4:17:21<16:57:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2488/12318 [4:17:25<16:57:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2488/12318 [4:17:25<16:57:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2489/12318 [4:17:30<16:56:51,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2489/12318 [4:17:30<16:56:51,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2490/12318 [4:17:32<16:56:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2490/12318 [4:17:32<16:56:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2491/12318 [4:17:39<16:56:27,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2491/12318 [4:17:39<16:56:27,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2492/12318 [4:17:48<16:56:30,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2492/12318 [4:17:48<16:56:30,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2493/12318 [4:17:54<16:56:25,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2493/12318 [4:17:54<16:56:25,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2494/12318 [4:17:55<16:56:00,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2494/12318 [4:17:55<16:56:00,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2495/12318 [4:18:01<16:55:51,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2495/12318 [4:18:01<16:55:51,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2496/12318 [4:19:08<16:59:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2496/12318 [4:19:08<16:59:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2497/12318 [4:19:11<16:59:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2497/12318 [4:19:11<16:59:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2498/12318 [4:19:14<16:59:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2498/12318 [4:19:14<16:59:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2499/12318 [4:19:22<16:59:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2499/12318 [4:19:22<16:59:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2500/12318 [4:19:31<16:59:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2500/12318 [4:19:31<16:59:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2501/12318 [4:19:38<16:59:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2501/12318 [4:19:38<16:59:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2502/12318 [4:19:47<16:59:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2502/12318 [4:19:47<16:59:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2503/12318 [4:19:52<16:59:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2503/12318 [4:19:52<16:59:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2504/12318 [4:19:57<16:58:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2504/12318 [4:19:57<16:58:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2505/12318 [4:20:02<16:58:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2505/12318 [4:20:02<16:58:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2506/12318 [4:20:09<16:58:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2506/12318 [4:20:09<16:58:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2507/12318 [4:20:14<16:58:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2507/12318 [4:20:14<16:58:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2508/12318 [4:20:22<16:58:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2508/12318 [4:20:22<16:58:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2509/12318 [4:20:26<16:58:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2509/12318 [4:20:26<16:58:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2510/12318 [4:20:34<16:58:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2510/12318 [4:20:34<16:58:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2511/12318 [4:20:39<16:58:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2511/12318 [4:20:39<16:58:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2512/12318 [4:20:44<16:57:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2512/12318 [4:20:44<16:57:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2513/12318 [4:20:48<16:57:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2513/12318 [4:20:48<16:57:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2514/12318 [4:20:49<16:57:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2514/12318 [4:20:49<16:57:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2515/12318 [4:20:58<16:57:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2515/12318 [4:20:58<16:57:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2516/12318 [4:21:04<16:57:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2516/12318 [4:21:04<16:57:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2517/12318 [4:21:12<16:57:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2517/12318 [4:21:12<16:57:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2518/12318 [4:21:17<16:56:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2518/12318 [4:21:17<16:56:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2519/12318 [4:21:21<16:56:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2519/12318 [4:21:21<16:56:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2520/12318 [4:21:24<16:56:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2520/12318 [4:21:24<16:56:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2521/12318 [4:21:26<16:56:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2521/12318 [4:21:26<16:56:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2522/12318 [4:21:31<16:55:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2522/12318 [4:21:31<16:55:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2523/12318 [4:21:40<16:55:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2523/12318 [4:21:40<16:55:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2524/12318 [4:21:43<16:55:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2524/12318 [4:21:43<16:55:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  20%|▏| 2525/12318 [4:21:52<16:55:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  20%|▏| 2525/12318 [4:21:52<16:55:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2526/12318 [4:21:58<16:55:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2526/12318 [4:21:58<16:55:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2527/12318 [4:22:02<16:55:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2527/12318 [4:22:02<16:55:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2528/12318 [4:22:34<16:56:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2528/12318 [4:22:34<16:56:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2529/12318 [4:22:42<16:56:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2529/12318 [4:22:42<16:56:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2530/12318 [4:22:51<16:56:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2530/12318 [4:22:51<16:56:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2531/12318 [4:22:58<16:56:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2531/12318 [4:22:58<16:56:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2532/12318 [4:23:05<16:56:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2532/12318 [4:23:05<16:56:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2533/12318 [4:23:09<16:56:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2533/12318 [4:23:09<16:56:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2534/12318 [4:23:17<16:56:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2534/12318 [4:23:17<16:56:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2535/12318 [4:23:24<16:56:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2535/12318 [4:23:24<16:56:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2536/12318 [4:23:29<16:56:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2536/12318 [4:23:29<16:56:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2537/12318 [4:23:32<16:56:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2537/12318 [4:23:32<16:56:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2538/12318 [4:23:40<16:56:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2538/12318 [4:23:40<16:56:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2539/12318 [4:23:41<16:55:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2539/12318 [4:23:41<16:55:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2540/12318 [4:23:50<16:55:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2540/12318 [4:23:50<16:55:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2541/12318 [4:23:53<16:55:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2541/12318 [4:23:53<16:55:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2542/12318 [4:23:59<16:55:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2542/12318 [4:23:59<16:55:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2543/12318 [4:24:02<16:54:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2543/12318 [4:24:02<16:54:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2544/12318 [4:24:11<16:55:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2544/12318 [4:24:11<16:55:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2545/12318 [4:24:19<16:55:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2545/12318 [4:24:19<16:55:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2546/12318 [4:24:22<16:54:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2546/12318 [4:24:22<16:54:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2547/12318 [4:24:28<16:54:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2547/12318 [4:24:28<16:54:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2548/12318 [4:24:32<16:54:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2548/12318 [4:24:32<16:54:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2549/12318 [4:24:37<16:54:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2549/12318 [4:24:37<16:54:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2550/12318 [4:24:42<16:53:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2550/12318 [4:24:42<16:53:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2551/12318 [4:24:49<16:53:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2551/12318 [4:24:49<16:53:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2552/12318 [4:24:54<16:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2552/12318 [4:24:54<16:53:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2553/12318 [4:24:55<16:53:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2553/12318 [4:24:55<16:53:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2554/12318 [4:24:58<16:53:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2554/12318 [4:24:58<16:53:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2555/12318 [4:25:05<16:52:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2555/12318 [4:25:05<16:52:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2556/12318 [4:25:10<16:52:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2556/12318 [4:25:10<16:52:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2557/12318 [4:25:14<16:52:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2557/12318 [4:25:14<16:52:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2558/12318 [4:25:22<16:52:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2558/12318 [4:25:22<16:52:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2559/12318 [4:25:29<16:52:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2559/12318 [4:25:29<16:52:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2560/12318 [4:25:44<16:52:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2560/12318 [4:25:44<16:52:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2561/12318 [4:25:50<16:52:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2561/12318 [4:25:50<16:52:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2562/12318 [4:25:54<16:52:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2562/12318 [4:25:54<16:52:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2563/12318 [4:25:57<16:52:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2563/12318 [4:25:57<16:52:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2564/12318 [4:26:02<16:52:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2564/12318 [4:26:02<16:52:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2565/12318 [4:26:08<16:51:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2565/12318 [4:26:08<16:51:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2566/12318 [4:26:15<16:51:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2566/12318 [4:26:15<16:51:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2567/12318 [4:26:22<16:51:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2567/12318 [4:26:22<16:51:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2568/12318 [4:26:27<16:51:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2568/12318 [4:26:27<16:51:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2569/12318 [4:26:33<16:51:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2569/12318 [4:26:33<16:51:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2570/12318 [4:26:38<16:51:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2570/12318 [4:26:38<16:51:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2571/12318 [4:26:46<16:51:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2571/12318 [4:26:46<16:51:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2572/12318 [4:26:54<16:51:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2572/12318 [4:26:54<16:51:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2573/12318 [4:26:57<16:51:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2573/12318 [4:26:57<16:51:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2574/12318 [4:27:00<16:50:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2574/12318 [4:27:00<16:50:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2575/12318 [4:27:07<16:50:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2575/12318 [4:27:07<16:50:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2576/12318 [4:27:13<16:50:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2576/12318 [4:27:13<16:50:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2577/12318 [4:27:15<16:50:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2577/12318 [4:27:15<16:50:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2578/12318 [4:27:17<16:49:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2578/12318 [4:27:17<16:49:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2579/12318 [4:27:23<16:49:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2579/12318 [4:27:23<16:49:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2580/12318 [4:27:30<16:49:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2580/12318 [4:27:30<16:49:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2581/12318 [4:27:32<16:49:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2581/12318 [4:27:32<16:49:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2582/12318 [4:27:36<16:49:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2582/12318 [4:27:36<16:49:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2583/12318 [4:27:40<16:48:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2583/12318 [4:27:40<16:48:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2584/12318 [4:27:47<16:48:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2584/12318 [4:27:47<16:48:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2585/12318 [4:27:50<16:48:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2585/12318 [4:27:50<16:48:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2586/12318 [4:27:54<16:48:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2586/12318 [4:27:54<16:48:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2587/12318 [4:27:59<16:48:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2587/12318 [4:27:59<16:48:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2588/12318 [4:28:03<16:47:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2588/12318 [4:28:03<16:47:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2589/12318 [4:28:09<16:47:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2589/12318 [4:28:09<16:47:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2590/12318 [4:28:10<16:47:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2590/12318 [4:28:10<16:47:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2591/12318 [4:28:12<16:46:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2591/12318 [4:28:12<16:46:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2592/12318 [4:29:06<16:49:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2592/12318 [4:29:06<16:49:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2593/12318 [4:29:13<16:49:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2593/12318 [4:29:13<16:49:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2594/12318 [4:29:18<16:49:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2594/12318 [4:29:18<16:49:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2595/12318 [4:29:23<16:49:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2595/12318 [4:29:23<16:49:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2596/12318 [4:29:27<16:49:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2596/12318 [4:29:27<16:49:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2597/12318 [4:29:33<16:48:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2597/12318 [4:29:33<16:48:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2598/12318 [4:29:41<16:49:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2598/12318 [4:29:41<16:49:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2599/12318 [4:29:50<16:49:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2599/12318 [4:29:50<16:49:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2600/12318 [4:29:58<16:49:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2600/12318 [4:29:58<16:49:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2601/12318 [4:30:03<16:48:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2601/12318 [4:30:03<16:48:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2602/12318 [4:30:11<16:48:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2602/12318 [4:30:11<16:48:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2603/12318 [4:30:12<16:48:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2603/12318 [4:30:12<16:48:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2604/12318 [4:30:14<16:48:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2604/12318 [4:30:14<16:48:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2605/12318 [4:30:16<16:47:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2605/12318 [4:30:16<16:47:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2606/12318 [4:30:20<16:47:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2606/12318 [4:30:20<16:47:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2607/12318 [4:30:26<16:47:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2607/12318 [4:30:26<16:47:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2608/12318 [4:30:34<16:47:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2608/12318 [4:30:34<16:47:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2609/12318 [4:30:42<16:47:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2609/12318 [4:30:42<16:47:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2610/12318 [4:30:50<16:47:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2610/12318 [4:30:50<16:47:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2611/12318 [4:30:57<16:47:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2611/12318 [4:30:57<16:47:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2612/12318 [4:31:01<16:47:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2612/12318 [4:31:01<16:47:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2613/12318 [4:31:04<16:46:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2613/12318 [4:31:04<16:46:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2614/12318 [4:31:11<16:46:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2614/12318 [4:31:11<16:46:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2615/12318 [4:31:15<16:46:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2615/12318 [4:31:15<16:46:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2616/12318 [4:31:20<16:46:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2616/12318 [4:31:20<16:46:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2617/12318 [4:31:21<16:45:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2617/12318 [4:31:21<16:45:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2618/12318 [4:31:28<16:45:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2618/12318 [4:31:28<16:45:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2619/12318 [4:31:33<16:45:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2619/12318 [4:31:33<16:45:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2620/12318 [4:31:35<16:45:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2620/12318 [4:31:35<16:45:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2621/12318 [4:31:36<16:44:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2621/12318 [4:31:36<16:44:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2622/12318 [4:31:41<16:44:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2622/12318 [4:31:41<16:44:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2623/12318 [4:31:47<16:44:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2623/12318 [4:31:47<16:44:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2624/12318 [4:32:45<16:47:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2624/12318 [4:32:45<16:47:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2625/12318 [4:32:50<16:47:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2625/12318 [4:32:50<16:47:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2626/12318 [4:32:54<16:47:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2626/12318 [4:32:54<16:47:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2627/12318 [4:33:01<16:47:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2627/12318 [4:33:01<16:47:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2628/12318 [4:33:04<16:46:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2628/12318 [4:33:04<16:46:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2629/12318 [4:33:11<16:46:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2629/12318 [4:33:11<16:46:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2630/12318 [4:33:20<16:46:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2630/12318 [4:33:20<16:46:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2631/12318 [4:33:25<16:46:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2631/12318 [4:33:25<16:46:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2632/12318 [4:33:27<16:46:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2632/12318 [4:33:27<16:46:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2633/12318 [4:33:34<16:46:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2633/12318 [4:33:34<16:46:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2634/12318 [4:33:43<16:46:22,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2634/12318 [4:33:43<16:46:22,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2635/12318 [4:33:48<16:46:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2635/12318 [4:33:48<16:46:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2636/12318 [4:33:56<16:46:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2636/12318 [4:33:56<16:46:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2637/12318 [4:34:03<16:46:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2637/12318 [4:34:03<16:46:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2638/12318 [4:34:07<16:45:54,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2638/12318 [4:34:07<16:45:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2639/12318 [4:34:14<16:45:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2639/12318 [4:34:14<16:45:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2640/12318 [4:34:23<16:45:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2640/12318 [4:34:23<16:45:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2641/12318 [4:34:24<16:45:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2641/12318 [4:34:24<16:45:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2642/12318 [4:34:28<16:45:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2642/12318 [4:34:28<16:45:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2643/12318 [4:34:33<16:45:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2643/12318 [4:34:33<16:45:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2644/12318 [4:34:38<16:44:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2644/12318 [4:34:38<16:44:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2645/12318 [4:34:42<16:44:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2645/12318 [4:34:42<16:44:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2646/12318 [4:34:44<16:44:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2646/12318 [4:34:44<16:44:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2647/12318 [4:34:53<16:44:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2647/12318 [4:34:53<16:44:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  21%|▏| 2648/12318 [4:34:58<16:44:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  21%|▏| 2648/12318 [4:34:58<16:44:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2649/12318 [4:35:05<16:44:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2649/12318 [4:35:05<16:44:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2650/12318 [4:35:10<16:43:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2650/12318 [4:35:10<16:43:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2651/12318 [4:35:18<16:43:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2651/12318 [4:35:18<16:43:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2652/12318 [4:35:20<16:43:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2652/12318 [4:35:20<16:43:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2653/12318 [4:35:29<16:43:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2653/12318 [4:35:29<16:43:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2654/12318 [4:35:36<16:43:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2654/12318 [4:35:36<16:43:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2655/12318 [4:35:42<16:43:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2655/12318 [4:35:42<16:43:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2656/12318 [4:35:59<16:44:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2656/12318 [4:35:59<16:44:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2657/12318 [4:36:02<16:43:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2657/12318 [4:36:02<16:43:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2658/12318 [4:36:11<16:43:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2658/12318 [4:36:11<16:43:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2659/12318 [4:36:15<16:43:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2659/12318 [4:36:15<16:43:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2660/12318 [4:36:17<16:43:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2660/12318 [4:36:17<16:43:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2661/12318 [4:36:22<16:42:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2661/12318 [4:36:22<16:42:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2662/12318 [4:36:27<16:42:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2662/12318 [4:36:27<16:42:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2663/12318 [4:36:35<16:42:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2663/12318 [4:36:35<16:42:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2664/12318 [4:36:37<16:42:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2664/12318 [4:36:37<16:42:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2665/12318 [4:36:41<16:42:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2665/12318 [4:36:41<16:42:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2666/12318 [4:36:46<16:42:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2666/12318 [4:36:46<16:42:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2667/12318 [4:36:54<16:42:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2667/12318 [4:36:54<16:42:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2668/12318 [4:37:03<16:42:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2668/12318 [4:37:03<16:42:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2669/12318 [4:37:06<16:41:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2669/12318 [4:37:06<16:41:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2670/12318 [4:37:09<16:41:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2670/12318 [4:37:09<16:41:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2671/12318 [4:37:16<16:41:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2671/12318 [4:37:16<16:41:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2672/12318 [4:37:21<16:41:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2672/12318 [4:37:21<16:41:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2673/12318 [4:37:27<16:41:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2673/12318 [4:37:27<16:41:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2674/12318 [4:37:32<16:40:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2674/12318 [4:37:32<16:40:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2675/12318 [4:37:33<16:40:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2675/12318 [4:37:33<16:40:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2676/12318 [4:37:37<16:40:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2676/12318 [4:37:37<16:40:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2677/12318 [4:37:42<16:40:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2677/12318 [4:37:42<16:40:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2678/12318 [4:37:51<16:40:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2678/12318 [4:37:51<16:40:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2679/12318 [4:37:57<16:40:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2679/12318 [4:37:57<16:40:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2680/12318 [4:38:01<16:39:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2680/12318 [4:38:01<16:39:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2681/12318 [4:38:07<16:39:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2681/12318 [4:38:07<16:39:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2682/12318 [4:38:11<16:39:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2682/12318 [4:38:11<16:39:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2683/12318 [4:38:16<16:39:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2683/12318 [4:38:16<16:39:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2684/12318 [4:38:25<16:39:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2684/12318 [4:38:25<16:39:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2685/12318 [4:38:32<16:39:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2685/12318 [4:38:32<16:39:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2686/12318 [4:38:37<16:39:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2686/12318 [4:38:37<16:39:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2687/12318 [4:38:42<16:38:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2687/12318 [4:38:42<16:38:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2688/12318 [4:39:09<16:40:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2688/12318 [4:39:09<16:40:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2689/12318 [4:39:13<16:39:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2689/12318 [4:39:13<16:39:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2690/12318 [4:39:18<16:39:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2690/12318 [4:39:18<16:39:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2691/12318 [4:39:26<16:39:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2691/12318 [4:39:26<16:39:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2692/12318 [4:39:31<16:39:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2692/12318 [4:39:31<16:39:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2693/12318 [4:39:37<16:39:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2693/12318 [4:39:37<16:39:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2694/12318 [4:39:41<16:39:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2694/12318 [4:39:41<16:39:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2695/12318 [4:39:46<16:39:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2695/12318 [4:39:46<16:39:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2696/12318 [4:39:54<16:38:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2696/12318 [4:39:54<16:38:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2697/12318 [4:40:03<16:39:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2697/12318 [4:40:03<16:39:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2698/12318 [4:40:05<16:38:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2698/12318 [4:40:05<16:38:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2699/12318 [4:40:12<16:38:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2699/12318 [4:40:12<16:38:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2700/12318 [4:40:13<16:38:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2700/12318 [4:40:13<16:38:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2701/12318 [4:40:17<16:38:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2701/12318 [4:40:17<16:38:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2702/12318 [4:40:21<16:37:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2702/12318 [4:40:21<16:37:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2703/12318 [4:40:26<16:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2703/12318 [4:40:26<16:37:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2704/12318 [4:40:31<16:37:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2704/12318 [4:40:31<16:37:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2705/12318 [4:40:36<16:37:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2705/12318 [4:40:36<16:37:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2706/12318 [4:40:44<16:37:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2706/12318 [4:40:44<16:37:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2707/12318 [4:40:48<16:36:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2707/12318 [4:40:48<16:36:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2708/12318 [4:40:53<16:36:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2708/12318 [4:40:53<16:36:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2709/12318 [4:41:00<16:36:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2709/12318 [4:41:00<16:36:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2710/12318 [4:41:01<16:36:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2710/12318 [4:41:01<16:36:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2711/12318 [4:41:06<16:36:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2711/12318 [4:41:06<16:36:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2712/12318 [4:41:09<16:35:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2712/12318 [4:41:09<16:35:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2713/12318 [4:41:18<16:35:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2713/12318 [4:41:18<16:35:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2714/12318 [4:41:25<16:35:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2714/12318 [4:41:25<16:35:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2715/12318 [4:41:30<16:35:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2715/12318 [4:41:30<16:35:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2716/12318 [4:41:35<16:35:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2716/12318 [4:41:35<16:35:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2717/12318 [4:41:42<16:35:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2717/12318 [4:41:42<16:35:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2718/12318 [4:41:48<16:35:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2718/12318 [4:41:48<16:35:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2719/12318 [4:41:54<16:35:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2719/12318 [4:41:54<16:35:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2720/12318 [4:42:21<16:36:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2720/12318 [4:42:21<16:36:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2721/12318 [4:42:29<16:36:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2721/12318 [4:42:29<16:36:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2722/12318 [4:42:37<16:36:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2722/12318 [4:42:37<16:36:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2723/12318 [4:42:40<16:36:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2723/12318 [4:42:40<16:36:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2724/12318 [4:42:45<16:35:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2724/12318 [4:42:45<16:35:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2725/12318 [4:42:47<16:35:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2725/12318 [4:42:47<16:35:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2726/12318 [4:42:52<16:35:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2726/12318 [4:42:52<16:35:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2727/12318 [4:42:56<16:35:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2727/12318 [4:42:56<16:35:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2728/12318 [4:43:03<16:35:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2728/12318 [4:43:03<16:35:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2729/12318 [4:43:11<16:35:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2729/12318 [4:43:11<16:35:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2730/12318 [4:43:15<16:34:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2730/12318 [4:43:15<16:34:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2731/12318 [4:43:21<16:34:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2731/12318 [4:43:21<16:34:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2732/12318 [4:43:25<16:34:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2732/12318 [4:43:25<16:34:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2733/12318 [4:43:33<16:34:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2733/12318 [4:43:33<16:34:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2734/12318 [4:43:39<16:34:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2734/12318 [4:43:39<16:34:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2735/12318 [4:43:45<16:34:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2735/12318 [4:43:45<16:34:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2736/12318 [4:43:48<16:33:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2736/12318 [4:43:48<16:33:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2737/12318 [4:43:56<16:33:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2737/12318 [4:43:56<16:33:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2738/12318 [4:44:05<16:33:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2738/12318 [4:44:05<16:33:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2739/12318 [4:44:09<16:33:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2739/12318 [4:44:09<16:33:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2740/12318 [4:44:18<16:33:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2740/12318 [4:44:18<16:33:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2741/12318 [4:44:23<16:33:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2741/12318 [4:44:23<16:33:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2742/12318 [4:44:27<16:33:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2742/12318 [4:44:27<16:33:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2743/12318 [4:44:30<16:33:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2743/12318 [4:44:30<16:33:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2744/12318 [4:44:32<16:32:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2744/12318 [4:44:32<16:32:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2745/12318 [4:44:40<16:32:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2745/12318 [4:44:40<16:32:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2746/12318 [4:44:47<16:32:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2746/12318 [4:44:47<16:32:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2747/12318 [4:44:51<16:32:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2747/12318 [4:44:51<16:32:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2748/12318 [4:44:56<16:32:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2748/12318 [4:44:56<16:32:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2749/12318 [4:45:01<16:32:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2749/12318 [4:45:01<16:32:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2750/12318 [4:45:04<16:31:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2750/12318 [4:45:04<16:31:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2751/12318 [4:45:09<16:31:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2751/12318 [4:45:09<16:31:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2752/12318 [4:46:01<16:34:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2752/12318 [4:46:01<16:34:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2753/12318 [4:46:07<16:34:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2753/12318 [4:46:07<16:34:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2754/12318 [4:46:12<16:33:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2754/12318 [4:46:12<16:33:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2755/12318 [4:46:20<16:33:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2755/12318 [4:46:20<16:33:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2756/12318 [4:46:29<16:33:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2756/12318 [4:46:29<16:33:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2757/12318 [4:46:34<16:33:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2757/12318 [4:46:34<16:33:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2758/12318 [4:46:42<16:33:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2758/12318 [4:46:42<16:33:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2759/12318 [4:46:50<16:33:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2759/12318 [4:46:50<16:33:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2760/12318 [4:46:55<16:33:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2760/12318 [4:46:55<16:33:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2761/12318 [4:47:01<16:33:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2761/12318 [4:47:01<16:33:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2762/12318 [4:47:09<16:33:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2762/12318 [4:47:09<16:33:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2763/12318 [4:47:16<16:33:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2763/12318 [4:47:16<16:33:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2764/12318 [4:47:19<16:33:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2764/12318 [4:47:19<16:33:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2765/12318 [4:47:26<16:33:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2765/12318 [4:47:26<16:33:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2766/12318 [4:47:27<16:32:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2766/12318 [4:47:27<16:32:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2767/12318 [4:47:29<16:32:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2767/12318 [4:47:29<16:32:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2768/12318 [4:47:34<16:32:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2768/12318 [4:47:34<16:32:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2769/12318 [4:47:39<16:32:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2769/12318 [4:47:39<16:32:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2770/12318 [4:47:44<16:31:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2770/12318 [4:47:44<16:31:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  22%|▏| 2771/12318 [4:47:48<16:31:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  22%|▏| 2771/12318 [4:47:48<16:31:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2772/12318 [4:47:54<16:31:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2772/12318 [4:47:54<16:31:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2773/12318 [4:47:58<16:31:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2773/12318 [4:47:58<16:31:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2774/12318 [4:48:06<16:31:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2774/12318 [4:48:06<16:31:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2775/12318 [4:48:15<16:31:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2775/12318 [4:48:15<16:31:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2776/12318 [4:48:23<16:31:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2776/12318 [4:48:23<16:31:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2777/12318 [4:48:31<16:31:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2777/12318 [4:48:31<16:31:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2778/12318 [4:48:34<16:30:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2778/12318 [4:48:34<16:30:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2779/12318 [4:48:36<16:30:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2779/12318 [4:48:36<16:30:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2780/12318 [4:48:43<16:30:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2780/12318 [4:48:43<16:30:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2781/12318 [4:48:50<16:30:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2781/12318 [4:48:50<16:30:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2782/12318 [4:48:55<16:30:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2782/12318 [4:48:55<16:30:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2783/12318 [4:49:01<16:30:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2783/12318 [4:49:01<16:30:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2784/12318 [4:49:21<16:30:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2784/12318 [4:49:21<16:30:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2785/12318 [4:49:24<16:30:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2785/12318 [4:49:24<16:30:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2786/12318 [4:49:28<16:30:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2786/12318 [4:49:28<16:30:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2787/12318 [4:49:37<16:30:28,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2787/12318 [4:49:37<16:30:28,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2788/12318 [4:49:41<16:30:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2788/12318 [4:49:41<16:30:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2789/12318 [4:49:49<16:30:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2789/12318 [4:49:49<16:30:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2790/12318 [4:49:52<16:29:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2790/12318 [4:49:52<16:29:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2791/12318 [4:49:56<16:29:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2791/12318 [4:49:56<16:29:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2792/12318 [4:50:05<16:29:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2792/12318 [4:50:05<16:29:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2793/12318 [4:50:06<16:29:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2793/12318 [4:50:06<16:29:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2794/12318 [4:50:14<16:29:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2794/12318 [4:50:14<16:29:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2795/12318 [4:50:23<16:29:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2795/12318 [4:50:23<16:29:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2796/12318 [4:50:28<16:29:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2796/12318 [4:50:28<16:29:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2797/12318 [4:50:32<16:29:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2797/12318 [4:50:32<16:29:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2798/12318 [4:50:41<16:29:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2798/12318 [4:50:41<16:29:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2799/12318 [4:50:48<16:28:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2799/12318 [4:50:48<16:28:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2800/12318 [4:50:53<16:28:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2800/12318 [4:50:53<16:28:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2801/12318 [4:50:57<16:28:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2801/12318 [4:50:57<16:28:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2802/12318 [4:51:02<16:28:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2802/12318 [4:51:02<16:28:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2803/12318 [4:51:06<16:28:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2803/12318 [4:51:06<16:28:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2804/12318 [4:51:09<16:27:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2804/12318 [4:51:09<16:27:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2805/12318 [4:51:11<16:27:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2805/12318 [4:51:11<16:27:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2806/12318 [4:51:20<16:27:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2806/12318 [4:51:20<16:27:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2807/12318 [4:51:21<16:27:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2807/12318 [4:51:21<16:27:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2808/12318 [4:51:23<16:26:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2808/12318 [4:51:23<16:26:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2809/12318 [4:51:24<16:26:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2809/12318 [4:51:24<16:26:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2810/12318 [4:51:32<16:26:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2810/12318 [4:51:32<16:26:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2811/12318 [4:51:41<16:26:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2811/12318 [4:51:41<16:26:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2812/12318 [4:51:45<16:26:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2812/12318 [4:51:45<16:26:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2813/12318 [4:51:48<16:26:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2813/12318 [4:51:48<16:26:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2814/12318 [4:51:57<16:26:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2814/12318 [4:51:57<16:26:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2815/12318 [4:52:02<16:25:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2815/12318 [4:52:02<16:25:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2816/12318 [4:52:42<16:27:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2816/12318 [4:52:42<16:27:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2817/12318 [4:52:51<16:27:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2817/12318 [4:52:51<16:27:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2818/12318 [4:52:53<16:27:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2818/12318 [4:52:53<16:27:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2819/12318 [4:52:59<16:27:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2819/12318 [4:52:59<16:27:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2820/12318 [4:53:07<16:27:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2820/12318 [4:53:07<16:27:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2821/12318 [4:53:10<16:26:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2821/12318 [4:53:10<16:26:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2822/12318 [4:53:11<16:26:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2822/12318 [4:53:11<16:26:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2823/12318 [4:53:17<16:26:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2823/12318 [4:53:17<16:26:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2824/12318 [4:53:21<16:26:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2824/12318 [4:53:21<16:26:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2825/12318 [4:53:27<16:26:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2825/12318 [4:53:27<16:26:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2826/12318 [4:53:35<16:26:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2826/12318 [4:53:35<16:26:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2827/12318 [4:53:38<16:25:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2827/12318 [4:53:38<16:25:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2828/12318 [4:53:40<16:25:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2828/12318 [4:53:40<16:25:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2829/12318 [4:53:46<16:25:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2829/12318 [4:53:46<16:25:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2830/12318 [4:53:47<16:24:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2830/12318 [4:53:47<16:24:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2831/12318 [4:53:49<16:24:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2831/12318 [4:53:49<16:24:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2832/12318 [4:53:51<16:24:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2832/12318 [4:53:51<16:24:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2833/12318 [4:54:00<16:24:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2833/12318 [4:54:00<16:24:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2834/12318 [4:54:06<16:24:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2834/12318 [4:54:06<16:24:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2835/12318 [4:54:11<16:24:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2835/12318 [4:54:11<16:24:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2836/12318 [4:54:20<16:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2836/12318 [4:54:20<16:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2837/12318 [4:54:28<16:24:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2837/12318 [4:54:28<16:24:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2838/12318 [4:54:29<16:23:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2838/12318 [4:54:29<16:23:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2839/12318 [4:54:34<16:23:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2839/12318 [4:54:34<16:23:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2840/12318 [4:54:38<16:23:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2840/12318 [4:54:38<16:23:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2841/12318 [4:54:43<16:23:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2841/12318 [4:54:43<16:23:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2842/12318 [4:54:46<16:22:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2842/12318 [4:54:46<16:22:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2843/12318 [4:54:48<16:22:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2843/12318 [4:54:48<16:22:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2844/12318 [4:54:53<16:22:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2844/12318 [4:54:53<16:22:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2845/12318 [4:55:02<16:22:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2845/12318 [4:55:02<16:22:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2846/12318 [4:55:10<16:22:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2846/12318 [4:55:10<16:22:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2847/12318 [4:55:15<16:22:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2847/12318 [4:55:15<16:22:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2848/12318 [4:56:12<16:24:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2848/12318 [4:56:12<16:24:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2849/12318 [4:56:17<16:24:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2849/12318 [4:56:17<16:24:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2850/12318 [4:56:23<16:24:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2850/12318 [4:56:23<16:24:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2851/12318 [4:56:28<16:24:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2851/12318 [4:56:28<16:24:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2852/12318 [4:56:32<16:24:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2852/12318 [4:56:32<16:24:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2853/12318 [4:56:36<16:24:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2853/12318 [4:56:36<16:24:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2854/12318 [4:56:38<16:23:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2854/12318 [4:56:38<16:23:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2855/12318 [4:56:45<16:23:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2855/12318 [4:56:45<16:23:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2856/12318 [4:56:47<16:23:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2856/12318 [4:56:47<16:23:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2857/12318 [4:56:52<16:23:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2857/12318 [4:56:52<16:23:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2858/12318 [4:56:54<16:22:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2858/12318 [4:56:54<16:22:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2859/12318 [4:57:01<16:22:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2859/12318 [4:57:01<16:22:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2860/12318 [4:57:02<16:22:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2860/12318 [4:57:02<16:22:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2861/12318 [4:57:07<16:22:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2861/12318 [4:57:07<16:22:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2862/12318 [4:57:16<16:22:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2862/12318 [4:57:16<16:22:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2863/12318 [4:57:18<16:21:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2863/12318 [4:57:18<16:21:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2864/12318 [4:57:20<16:21:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2864/12318 [4:57:20<16:21:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2865/12318 [4:57:23<16:21:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2865/12318 [4:57:23<16:21:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2866/12318 [4:57:27<16:20:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2866/12318 [4:57:27<16:20:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2867/12318 [4:57:31<16:20:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2867/12318 [4:57:31<16:20:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2868/12318 [4:57:38<16:20:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2868/12318 [4:57:38<16:20:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2869/12318 [4:57:41<16:20:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2869/12318 [4:57:41<16:20:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2870/12318 [4:57:49<16:20:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2870/12318 [4:57:49<16:20:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2871/12318 [4:57:52<16:20:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2871/12318 [4:57:52<16:20:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2872/12318 [4:57:57<16:19:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2872/12318 [4:57:57<16:19:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2873/12318 [4:58:01<16:19:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2873/12318 [4:58:01<16:19:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2874/12318 [4:58:09<16:19:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2874/12318 [4:58:09<16:19:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2875/12318 [4:58:15<16:19:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2875/12318 [4:58:15<16:19:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2876/12318 [4:58:19<16:19:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2876/12318 [4:58:19<16:19:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2877/12318 [4:58:21<16:19:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2877/12318 [4:58:21<16:19:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2878/12318 [4:58:26<16:18:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2878/12318 [4:58:26<16:18:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2879/12318 [4:58:30<16:18:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2879/12318 [4:58:30<16:18:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2880/12318 [4:59:30<16:21:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2880/12318 [4:59:30<16:21:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2881/12318 [4:59:34<16:21:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2881/12318 [4:59:34<16:21:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2882/12318 [4:59:43<16:21:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2882/12318 [4:59:43<16:21:18,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2883/12318 [4:59:48<16:21:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2883/12318 [4:59:48<16:21:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2884/12318 [4:59:56<16:21:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2884/12318 [4:59:56<16:21:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2885/12318 [5:00:02<16:21:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2885/12318 [5:00:02<16:21:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2886/12318 [5:00:08<16:20:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2886/12318 [5:00:08<16:20:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2887/12318 [5:00:11<16:20:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2887/12318 [5:00:11<16:20:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2888/12318 [5:00:13<16:20:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2888/12318 [5:00:13<16:20:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2889/12318 [5:00:21<16:20:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2889/12318 [5:00:21<16:20:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2890/12318 [5:00:25<16:20:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2890/12318 [5:00:25<16:20:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2891/12318 [5:00:34<16:20:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2891/12318 [5:00:34<16:20:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2892/12318 [5:00:39<16:19:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2892/12318 [5:00:39<16:19:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2893/12318 [5:00:48<16:19:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2893/12318 [5:00:48<16:19:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  23%|▏| 2894/12318 [5:00:50<16:19:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  23%|▏| 2894/12318 [5:00:50<16:19:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2895/12318 [5:00:53<16:19:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2895/12318 [5:00:53<16:19:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2896/12318 [5:00:57<16:19:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2896/12318 [5:00:57<16:19:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2897/12318 [5:01:05<16:19:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2897/12318 [5:01:05<16:19:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2898/12318 [5:01:12<16:19:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2898/12318 [5:01:12<16:19:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2899/12318 [5:01:16<16:18:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2899/12318 [5:01:16<16:18:51,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2900/12318 [5:01:24<16:18:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2900/12318 [5:01:24<16:18:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2901/12318 [5:01:28<16:18:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2901/12318 [5:01:28<16:18:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2902/12318 [5:01:31<16:18:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2902/12318 [5:01:31<16:18:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2903/12318 [5:01:33<16:18:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2903/12318 [5:01:33<16:18:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2904/12318 [5:01:42<16:18:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2904/12318 [5:01:42<16:18:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2905/12318 [5:01:43<16:17:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2905/12318 [5:01:43<16:17:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2906/12318 [5:01:46<16:17:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2906/12318 [5:01:46<16:17:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2907/12318 [5:01:49<16:17:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2907/12318 [5:01:49<16:17:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2908/12318 [5:01:57<16:17:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2908/12318 [5:01:57<16:17:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2909/12318 [5:02:00<16:16:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2909/12318 [5:02:00<16:16:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2910/12318 [5:02:04<16:16:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2910/12318 [5:02:04<16:16:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2911/12318 [5:02:08<16:16:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2911/12318 [5:02:08<16:16:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2912/12318 [5:02:45<16:17:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2912/12318 [5:02:45<16:17:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2913/12318 [5:02:53<16:17:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2913/12318 [5:02:53<16:17:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2914/12318 [5:02:59<16:17:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2914/12318 [5:02:59<16:17:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2915/12318 [5:03:00<16:17:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2915/12318 [5:03:00<16:17:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2916/12318 [5:03:09<16:17:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2916/12318 [5:03:09<16:17:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2917/12318 [5:03:11<16:17:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2917/12318 [5:03:11<16:17:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2918/12318 [5:03:15<16:16:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2918/12318 [5:03:15<16:16:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2919/12318 [5:03:24<16:16:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2919/12318 [5:03:24<16:16:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2920/12318 [5:03:28<16:16:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2920/12318 [5:03:28<16:16:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2921/12318 [5:03:31<16:16:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2921/12318 [5:03:31<16:16:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2922/12318 [5:03:38<16:16:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2922/12318 [5:03:38<16:16:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2923/12318 [5:03:46<16:16:22,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2923/12318 [5:03:46<16:16:22,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2924/12318 [5:03:53<16:16:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2924/12318 [5:03:53<16:16:18,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2925/12318 [5:03:58<16:16:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2925/12318 [5:03:58<16:16:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2926/12318 [5:04:00<16:15:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2926/12318 [5:04:00<16:15:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2927/12318 [5:04:09<16:15:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2927/12318 [5:04:09<16:15:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2928/12318 [5:04:14<16:15:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2928/12318 [5:04:14<16:15:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2929/12318 [5:04:18<16:15:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2929/12318 [5:04:18<16:15:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2930/12318 [5:04:22<16:15:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2930/12318 [5:04:22<16:15:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2931/12318 [5:04:31<16:15:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2931/12318 [5:04:31<16:15:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2932/12318 [5:04:35<16:15:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2932/12318 [5:04:35<16:15:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2933/12318 [5:04:43<16:15:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2933/12318 [5:04:43<16:15:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2934/12318 [5:04:51<16:15:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2934/12318 [5:04:51<16:15:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2935/12318 [5:04:55<16:14:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2935/12318 [5:04:55<16:14:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2936/12318 [5:05:00<16:14:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2936/12318 [5:05:00<16:14:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2937/12318 [5:05:04<16:14:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2937/12318 [5:05:04<16:14:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2938/12318 [5:05:06<16:14:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2938/12318 [5:05:06<16:14:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2939/12318 [5:05:09<16:13:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2939/12318 [5:05:09<16:13:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2940/12318 [5:05:11<16:13:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2940/12318 [5:05:11<16:13:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2941/12318 [5:05:19<16:13:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2941/12318 [5:05:19<16:13:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2942/12318 [5:05:25<16:13:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2942/12318 [5:05:25<16:13:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2943/12318 [5:05:31<16:13:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2943/12318 [5:05:31<16:13:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2944/12318 [5:06:01<16:14:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2944/12318 [5:06:01<16:14:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2945/12318 [5:06:09<16:14:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2945/12318 [5:06:09<16:14:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2946/12318 [5:06:12<16:14:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2946/12318 [5:06:12<16:14:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2947/12318 [5:06:16<16:13:54,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2947/12318 [5:06:16<16:13:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2948/12318 [5:06:21<16:13:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2948/12318 [5:06:21<16:13:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2949/12318 [5:06:24<16:13:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2949/12318 [5:06:24<16:13:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2950/12318 [5:06:29<16:13:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2950/12318 [5:06:29<16:13:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2951/12318 [5:06:36<16:13:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2951/12318 [5:06:36<16:13:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2952/12318 [5:06:43<16:13:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2952/12318 [5:06:43<16:13:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2953/12318 [5:06:49<16:13:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2953/12318 [5:06:49<16:13:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2954/12318 [5:06:55<16:12:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2954/12318 [5:06:55<16:12:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2955/12318 [5:07:00<16:12:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2955/12318 [5:07:00<16:12:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2956/12318 [5:07:03<16:12:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2956/12318 [5:07:03<16:12:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2957/12318 [5:07:10<16:12:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2957/12318 [5:07:10<16:12:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2958/12318 [5:07:13<16:12:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2958/12318 [5:07:13<16:12:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2959/12318 [5:07:22<16:12:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2959/12318 [5:07:22<16:12:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2960/12318 [5:07:26<16:11:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2960/12318 [5:07:26<16:11:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2961/12318 [5:07:33<16:11:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2961/12318 [5:07:33<16:11:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2962/12318 [5:07:39<16:11:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2962/12318 [5:07:39<16:11:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2963/12318 [5:07:44<16:11:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2963/12318 [5:07:44<16:11:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2964/12318 [5:07:46<16:11:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2964/12318 [5:07:46<16:11:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2965/12318 [5:07:49<16:11:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2965/12318 [5:07:49<16:11:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2966/12318 [5:07:52<16:10:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2966/12318 [5:07:52<16:10:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2967/12318 [5:07:57<16:10:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2967/12318 [5:07:57<16:10:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2968/12318 [5:08:03<16:10:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2968/12318 [5:08:03<16:10:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2969/12318 [5:08:07<16:10:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2969/12318 [5:08:07<16:10:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2970/12318 [5:08:16<16:10:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2970/12318 [5:08:16<16:10:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2971/12318 [5:08:21<16:10:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2971/12318 [5:08:21<16:10:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2972/12318 [5:08:25<16:09:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2972/12318 [5:08:25<16:09:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2973/12318 [5:08:27<16:09:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2973/12318 [5:08:27<16:09:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2974/12318 [5:08:33<16:09:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2974/12318 [5:08:33<16:09:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2975/12318 [5:08:42<16:09:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2975/12318 [5:08:42<16:09:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2976/12318 [5:09:23<16:11:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2976/12318 [5:09:23<16:11:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2977/12318 [5:09:29<16:11:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2977/12318 [5:09:29<16:11:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2978/12318 [5:09:36<16:11:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2978/12318 [5:09:36<16:11:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2979/12318 [5:09:43<16:10:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2979/12318 [5:09:43<16:10:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2980/12318 [5:09:51<16:10:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2980/12318 [5:09:51<16:10:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2981/12318 [5:09:57<16:10:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2981/12318 [5:09:57<16:10:51,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2982/12318 [5:10:04<16:10:46,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2982/12318 [5:10:04<16:10:46,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2983/12318 [5:10:12<16:10:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2983/12318 [5:10:12<16:10:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2984/12318 [5:10:14<16:10:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2984/12318 [5:10:14<16:10:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2985/12318 [5:10:18<16:10:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2985/12318 [5:10:18<16:10:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2986/12318 [5:10:24<16:10:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2986/12318 [5:10:24<16:10:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2987/12318 [5:10:28<16:09:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2987/12318 [5:10:28<16:09:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2988/12318 [5:10:33<16:09:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2988/12318 [5:10:33<16:09:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2989/12318 [5:10:40<16:09:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2989/12318 [5:10:40<16:09:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2990/12318 [5:10:45<16:09:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2990/12318 [5:10:45<16:09:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2991/12318 [5:10:49<16:09:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2991/12318 [5:10:49<16:09:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2992/12318 [5:10:53<16:09:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2992/12318 [5:10:53<16:09:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2993/12318 [5:11:00<16:08:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2993/12318 [5:11:00<16:08:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2994/12318 [5:11:05<16:08:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2994/12318 [5:11:05<16:08:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2995/12318 [5:11:14<16:08:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2995/12318 [5:11:14<16:08:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2996/12318 [5:11:21<16:08:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2996/12318 [5:11:21<16:08:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2997/12318 [5:11:29<16:08:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2997/12318 [5:11:29<16:08:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2998/12318 [5:11:32<16:08:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2998/12318 [5:11:32<16:08:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 2999/12318 [5:11:38<16:08:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 2999/12318 [5:11:38<16:08:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3000/12318 [5:11:44<16:08:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3000/12318 [5:11:44<16:08:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3001/12318 [5:11:47<16:08:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3001/12318 [5:11:47<16:08:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3002/12318 [5:11:49<16:07:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3002/12318 [5:11:49<16:07:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3003/12318 [5:11:57<16:07:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3003/12318 [5:11:57<16:07:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3004/12318 [5:12:03<16:07:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3004/12318 [5:12:03<16:07:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3005/12318 [5:12:10<16:07:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3005/12318 [5:12:10<16:07:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3006/12318 [5:12:16<16:07:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3006/12318 [5:12:16<16:07:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3007/12318 [5:12:20<16:07:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3007/12318 [5:12:20<16:07:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3008/12318 [5:12:44<16:07:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3008/12318 [5:12:44<16:07:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3009/12318 [5:12:49<16:07:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3009/12318 [5:12:49<16:07:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3010/12318 [5:12:55<16:07:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3010/12318 [5:12:55<16:07:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3011/12318 [5:13:03<16:07:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3011/12318 [5:13:03<16:07:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3012/12318 [5:13:05<16:07:19,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3012/12318 [5:13:05<16:07:19,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3013/12318 [5:13:10<16:07:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3013/12318 [5:13:10<16:07:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3014/12318 [5:13:15<16:06:59,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3014/12318 [5:13:15<16:06:59,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3015/12318 [5:13:20<16:06:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3015/12318 [5:13:20<16:06:51,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3016/12318 [5:13:22<16:06:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3016/12318 [5:13:22<16:06:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  24%|▏| 3017/12318 [5:13:25<16:06:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  24%|▏| 3017/12318 [5:13:25<16:06:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3018/12318 [5:13:31<16:06:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3018/12318 [5:13:31<16:06:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3019/12318 [5:13:36<16:05:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3019/12318 [5:13:36<16:05:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3020/12318 [5:13:38<16:05:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3020/12318 [5:13:38<16:05:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3021/12318 [5:13:41<16:05:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3021/12318 [5:13:41<16:05:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3022/12318 [5:13:49<16:05:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3022/12318 [5:13:49<16:05:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3023/12318 [5:13:56<16:05:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3023/12318 [5:13:56<16:05:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3024/12318 [5:14:01<16:05:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3024/12318 [5:14:01<16:05:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3025/12318 [5:14:05<16:04:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3025/12318 [5:14:05<16:04:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3026/12318 [5:14:13<16:04:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3026/12318 [5:14:13<16:04:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3027/12318 [5:14:21<16:04:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3027/12318 [5:14:21<16:04:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3028/12318 [5:14:29<16:04:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3028/12318 [5:14:29<16:04:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3029/12318 [5:14:32<16:04:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3029/12318 [5:14:32<16:04:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3030/12318 [5:14:34<16:04:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3030/12318 [5:14:34<16:04:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3031/12318 [5:14:40<16:04:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3031/12318 [5:14:40<16:04:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3032/12318 [5:14:45<16:03:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3032/12318 [5:14:45<16:03:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3033/12318 [5:14:50<16:03:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3033/12318 [5:14:50<16:03:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3034/12318 [5:14:54<16:03:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3034/12318 [5:14:54<16:03:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3035/12318 [5:14:56<16:03:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3035/12318 [5:14:56<16:03:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3036/12318 [5:15:04<16:03:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3036/12318 [5:15:04<16:03:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3037/12318 [5:15:07<16:02:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3037/12318 [5:15:07<16:02:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3038/12318 [5:15:15<16:03:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3038/12318 [5:15:15<16:03:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3039/12318 [5:15:23<16:02:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3039/12318 [5:15:23<16:02:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3040/12318 [5:15:57<16:04:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3040/12318 [5:15:57<16:04:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3041/12318 [5:16:01<16:04:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3041/12318 [5:16:01<16:04:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3042/12318 [5:16:07<16:03:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3042/12318 [5:16:07<16:03:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3043/12318 [5:16:11<16:03:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3043/12318 [5:16:11<16:03:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3044/12318 [5:16:20<16:03:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3044/12318 [5:16:20<16:03:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3045/12318 [5:16:29<16:03:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3045/12318 [5:16:29<16:03:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3046/12318 [5:16:34<16:03:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3046/12318 [5:16:34<16:03:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3047/12318 [5:16:43<16:03:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3047/12318 [5:16:43<16:03:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3048/12318 [5:16:51<16:03:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3048/12318 [5:16:51<16:03:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3049/12318 [5:16:57<16:03:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3049/12318 [5:16:57<16:03:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3050/12318 [5:17:00<16:03:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3050/12318 [5:17:00<16:03:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3051/12318 [5:17:03<16:03:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3051/12318 [5:17:03<16:03:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3052/12318 [5:17:10<16:02:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3052/12318 [5:17:10<16:02:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3053/12318 [5:17:14<16:02:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3053/12318 [5:17:14<16:02:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3054/12318 [5:17:22<16:02:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3054/12318 [5:17:22<16:02:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3055/12318 [5:17:29<16:02:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3055/12318 [5:17:29<16:02:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3056/12318 [5:17:35<16:02:34,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3056/12318 [5:17:35<16:02:34,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3057/12318 [5:17:44<16:02:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3057/12318 [5:17:44<16:02:35,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3058/12318 [5:17:49<16:02:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3058/12318 [5:17:49<16:02:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3059/12318 [5:17:51<16:02:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3059/12318 [5:17:51<16:02:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3060/12318 [5:18:00<16:02:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3060/12318 [5:18:00<16:02:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3061/12318 [5:18:04<16:01:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3061/12318 [5:18:04<16:01:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3062/12318 [5:18:10<16:01:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3062/12318 [5:18:10<16:01:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3063/12318 [5:18:19<16:01:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3063/12318 [5:18:19<16:01:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3064/12318 [5:18:23<16:01:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3064/12318 [5:18:23<16:01:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3065/12318 [5:18:31<16:01:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3065/12318 [5:18:31<16:01:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3066/12318 [5:18:35<16:01:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3066/12318 [5:18:35<16:01:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3067/12318 [5:18:40<16:01:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3067/12318 [5:18:40<16:01:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3068/12318 [5:18:48<16:01:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3068/12318 [5:18:48<16:01:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3069/12318 [5:18:56<16:01:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3069/12318 [5:18:56<16:01:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3070/12318 [5:18:57<16:00:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3070/12318 [5:18:57<16:00:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3071/12318 [5:19:01<16:00:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3071/12318 [5:19:01<16:00:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3072/12318 [5:19:17<16:00:59,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3072/12318 [5:19:17<16:00:59,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3073/12318 [5:19:24<16:00:54,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3073/12318 [5:19:24<16:00:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3074/12318 [5:19:31<16:00:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3074/12318 [5:19:31<16:00:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3075/12318 [5:19:38<16:00:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3075/12318 [5:19:38<16:00:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3076/12318 [5:19:41<16:00:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3076/12318 [5:19:41<16:00:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3077/12318 [5:19:48<16:00:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3077/12318 [5:19:48<16:00:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3078/12318 [5:19:52<16:00:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▏| 3078/12318 [5:19:52<16:00:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3079/12318 [5:19:54<15:59:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▏| 3079/12318 [5:19:54<15:59:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3080/12318 [5:20:03<15:59:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3080/12318 [5:20:03<15:59:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3081/12318 [5:20:07<15:59:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3081/12318 [5:20:07<15:59:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3082/12318 [5:20:12<15:59:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3082/12318 [5:20:12<15:59:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3083/12318 [5:20:17<15:59:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3083/12318 [5:20:17<15:59:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3084/12318 [5:20:22<15:59:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3084/12318 [5:20:22<15:59:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3085/12318 [5:20:31<15:59:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3085/12318 [5:20:31<15:59:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3086/12318 [5:20:39<15:59:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3086/12318 [5:20:39<15:59:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3087/12318 [5:20:48<15:59:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3087/12318 [5:20:48<15:59:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3088/12318 [5:20:53<15:59:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3088/12318 [5:20:53<15:59:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3089/12318 [5:20:56<15:58:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3089/12318 [5:20:56<15:58:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3090/12318 [5:21:00<15:58:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3090/12318 [5:21:00<15:58:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3091/12318 [5:21:09<15:58:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3091/12318 [5:21:09<15:58:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3092/12318 [5:21:16<15:58:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3092/12318 [5:21:16<15:58:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3093/12318 [5:21:25<15:58:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3093/12318 [5:21:25<15:58:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3094/12318 [5:21:27<15:58:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3094/12318 [5:21:27<15:58:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3095/12318 [5:21:32<15:58:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3095/12318 [5:21:32<15:58:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3096/12318 [5:21:33<15:57:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3096/12318 [5:21:33<15:57:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3097/12318 [5:21:35<15:57:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3097/12318 [5:21:35<15:57:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3098/12318 [5:21:40<15:57:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3098/12318 [5:21:40<15:57:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3099/12318 [5:21:44<15:57:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3099/12318 [5:21:44<15:57:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3100/12318 [5:21:50<15:56:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3100/12318 [5:21:50<15:56:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3101/12318 [5:21:53<15:56:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3101/12318 [5:21:53<15:56:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3102/12318 [5:22:02<15:56:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3102/12318 [5:22:02<15:56:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3103/12318 [5:22:06<15:56:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3103/12318 [5:22:06<15:56:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3104/12318 [5:22:41<15:57:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3104/12318 [5:22:41<15:57:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3105/12318 [5:22:49<15:57:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3105/12318 [5:22:49<15:57:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3106/12318 [5:22:53<15:57:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3106/12318 [5:22:53<15:57:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3107/12318 [5:22:58<15:57:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3107/12318 [5:22:58<15:57:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3108/12318 [5:23:02<15:57:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3108/12318 [5:23:02<15:57:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3109/12318 [5:23:03<15:56:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3109/12318 [5:23:03<15:56:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3110/12318 [5:23:12<15:56:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3110/12318 [5:23:12<15:56:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3111/12318 [5:23:17<15:56:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3111/12318 [5:23:17<15:56:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3112/12318 [5:23:21<15:56:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3112/12318 [5:23:21<15:56:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3113/12318 [5:23:26<15:56:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3113/12318 [5:23:26<15:56:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3114/12318 [5:23:29<15:56:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3114/12318 [5:23:29<15:56:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3115/12318 [5:23:32<15:55:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3115/12318 [5:23:32<15:55:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3116/12318 [5:23:37<15:55:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3116/12318 [5:23:37<15:55:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3117/12318 [5:23:42<15:55:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3117/12318 [5:23:42<15:55:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3118/12318 [5:23:45<15:55:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3118/12318 [5:23:45<15:55:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3119/12318 [5:23:51<15:55:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3119/12318 [5:23:51<15:55:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3120/12318 [5:23:57<15:55:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3120/12318 [5:23:57<15:55:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3121/12318 [5:24:02<15:54:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3121/12318 [5:24:02<15:54:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3122/12318 [5:24:06<15:54:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3122/12318 [5:24:06<15:54:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3123/12318 [5:24:15<15:54:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3123/12318 [5:24:15<15:54:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3124/12318 [5:24:24<15:54:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3124/12318 [5:24:24<15:54:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3125/12318 [5:24:28<15:54:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3125/12318 [5:24:28<15:54:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3126/12318 [5:24:32<15:54:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3126/12318 [5:24:32<15:54:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3127/12318 [5:24:40<15:54:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3127/12318 [5:24:40<15:54:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3128/12318 [5:24:45<15:54:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3128/12318 [5:24:45<15:54:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3129/12318 [5:24:52<15:54:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3129/12318 [5:24:52<15:54:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3130/12318 [5:25:00<15:54:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3130/12318 [5:25:00<15:54:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3131/12318 [5:25:04<15:53:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3131/12318 [5:25:04<15:53:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3132/12318 [5:25:10<15:53:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3132/12318 [5:25:10<15:53:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3133/12318 [5:25:15<15:53:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3133/12318 [5:25:15<15:53:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3134/12318 [5:25:19<15:53:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3134/12318 [5:25:19<15:53:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3135/12318 [5:25:24<15:53:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3135/12318 [5:25:24<15:53:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3136/12318 [5:25:57<15:54:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3136/12318 [5:25:57<15:54:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3137/12318 [5:26:05<15:54:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3137/12318 [5:26:05<15:54:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3138/12318 [5:26:12<15:54:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3138/12318 [5:26:12<15:54:18,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3139/12318 [5:26:17<15:54:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3139/12318 [5:26:17<15:54:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3140/12318 [5:26:21<15:53:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3140/12318 [5:26:21<15:53:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  25%|▎| 3141/12318 [5:26:29<15:53:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  25%|▎| 3141/12318 [5:26:29<15:53:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3142/12318 [5:26:34<15:53:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3142/12318 [5:26:34<15:53:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3143/12318 [5:26:40<15:53:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3143/12318 [5:26:40<15:53:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3144/12318 [5:26:41<15:53:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3144/12318 [5:26:41<15:53:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3145/12318 [5:26:50<15:53:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3145/12318 [5:26:50<15:53:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3146/12318 [5:26:57<15:53:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3146/12318 [5:26:57<15:53:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3147/12318 [5:27:00<15:52:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3147/12318 [5:27:00<15:52:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3148/12318 [5:27:05<15:52:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3148/12318 [5:27:05<15:52:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3149/12318 [5:27:09<15:52:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3149/12318 [5:27:09<15:52:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3150/12318 [5:27:18<15:52:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3150/12318 [5:27:18<15:52:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3151/12318 [5:27:27<15:52:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3151/12318 [5:27:27<15:52:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3152/12318 [5:27:33<15:52:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3152/12318 [5:27:33<15:52:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3153/12318 [5:27:35<15:52:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3153/12318 [5:27:35<15:52:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3154/12318 [5:27:44<15:52:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3154/12318 [5:27:44<15:52:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3155/12318 [5:27:52<15:52:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3155/12318 [5:27:52<15:52:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3156/12318 [5:27:58<15:52:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3156/12318 [5:27:58<15:52:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3157/12318 [5:28:06<15:52:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3157/12318 [5:28:06<15:52:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3158/12318 [5:28:10<15:51:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3158/12318 [5:28:10<15:51:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3159/12318 [5:28:18<15:51:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3159/12318 [5:28:18<15:51:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3160/12318 [5:28:23<15:51:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3160/12318 [5:28:23<15:51:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3161/12318 [5:28:29<15:51:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3161/12318 [5:28:29<15:51:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3162/12318 [5:28:38<15:51:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3162/12318 [5:28:38<15:51:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3163/12318 [5:28:46<15:51:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3163/12318 [5:28:46<15:51:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3164/12318 [5:28:55<15:51:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3164/12318 [5:28:55<15:51:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3165/12318 [5:28:56<15:51:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3165/12318 [5:28:56<15:51:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3166/12318 [5:29:05<15:51:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3166/12318 [5:29:05<15:51:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3167/12318 [5:29:10<15:51:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3167/12318 [5:29:10<15:51:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3168/12318 [5:29:25<15:51:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3168/12318 [5:29:25<15:51:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3169/12318 [5:29:31<15:51:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3169/12318 [5:29:31<15:51:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3170/12318 [5:29:35<15:51:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3170/12318 [5:29:35<15:51:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3171/12318 [5:29:37<15:50:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3171/12318 [5:29:37<15:50:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3172/12318 [5:29:40<15:50:34,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3172/12318 [5:29:40<15:50:34,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3173/12318 [5:29:43<15:50:19,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3173/12318 [5:29:43<15:50:19,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3174/12318 [5:29:50<15:50:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3174/12318 [5:29:50<15:50:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3175/12318 [5:29:59<15:50:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3175/12318 [5:29:59<15:50:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3176/12318 [5:30:02<15:50:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3176/12318 [5:30:02<15:50:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3177/12318 [5:30:03<15:49:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3177/12318 [5:30:03<15:49:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3178/12318 [5:30:11<15:49:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3178/12318 [5:30:11<15:49:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3179/12318 [5:30:16<15:49:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3179/12318 [5:30:16<15:49:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3180/12318 [5:30:22<15:49:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3180/12318 [5:30:22<15:49:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3181/12318 [5:30:25<15:49:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3181/12318 [5:30:25<15:49:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3182/12318 [5:30:33<15:49:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3182/12318 [5:30:33<15:49:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3183/12318 [5:30:37<15:48:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3183/12318 [5:30:37<15:48:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3184/12318 [5:30:43<15:48:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3184/12318 [5:30:43<15:48:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3185/12318 [5:30:45<15:48:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3185/12318 [5:30:45<15:48:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3186/12318 [5:30:53<15:48:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3186/12318 [5:30:53<15:48:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3187/12318 [5:30:56<15:48:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3187/12318 [5:30:56<15:48:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3188/12318 [5:31:04<15:48:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3188/12318 [5:31:04<15:48:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3189/12318 [5:31:08<15:47:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3189/12318 [5:31:08<15:47:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3190/12318 [5:31:14<15:47:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3190/12318 [5:31:14<15:47:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3191/12318 [5:31:18<15:47:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3191/12318 [5:31:18<15:47:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3192/12318 [5:31:21<15:47:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3192/12318 [5:31:21<15:47:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3193/12318 [5:31:25<15:47:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3193/12318 [5:31:25<15:47:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3194/12318 [5:31:27<15:46:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3194/12318 [5:31:27<15:46:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3195/12318 [5:31:33<15:46:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3195/12318 [5:31:33<15:46:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3196/12318 [5:31:37<15:46:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3196/12318 [5:31:37<15:46:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3197/12318 [5:31:42<15:46:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3197/12318 [5:31:42<15:46:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3198/12318 [5:31:47<15:46:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3198/12318 [5:31:47<15:46:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3199/12318 [5:31:55<15:46:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3199/12318 [5:31:55<15:46:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3200/12318 [5:32:36<15:47:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3200/12318 [5:32:36<15:47:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3201/12318 [5:33:01<15:48:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3201/12318 [5:33:01<15:48:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3202/12318 [5:33:10<15:48:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3202/12318 [5:33:10<15:48:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3203/12318 [5:33:15<15:48:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3203/12318 [5:33:15<15:48:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3204/12318 [5:33:19<15:48:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3204/12318 [5:33:19<15:48:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3205/12318 [5:33:21<15:47:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3205/12318 [5:33:21<15:47:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3206/12318 [5:33:29<15:47:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3206/12318 [5:33:29<15:47:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3207/12318 [5:33:34<15:47:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3207/12318 [5:33:34<15:47:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3208/12318 [5:33:41<15:47:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3208/12318 [5:33:41<15:47:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3209/12318 [5:33:45<15:47:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3209/12318 [5:33:45<15:47:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3210/12318 [5:33:48<15:47:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3210/12318 [5:33:48<15:47:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3211/12318 [5:33:50<15:46:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3211/12318 [5:33:50<15:46:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3212/12318 [5:33:55<15:46:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3212/12318 [5:33:55<15:46:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3213/12318 [5:33:59<15:46:28,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3213/12318 [5:33:59<15:46:28,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3214/12318 [5:34:02<15:46:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3214/12318 [5:34:02<15:46:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3215/12318 [5:34:09<15:46:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3215/12318 [5:34:09<15:46:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3216/12318 [5:34:15<15:46:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3216/12318 [5:34:15<15:46:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3217/12318 [5:34:20<15:45:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3217/12318 [5:34:20<15:45:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3218/12318 [5:34:27<15:45:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3218/12318 [5:34:27<15:45:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3219/12318 [5:34:32<15:45:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3219/12318 [5:34:32<15:45:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3220/12318 [5:34:41<15:45:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3220/12318 [5:34:41<15:45:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3221/12318 [5:34:48<15:45:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3221/12318 [5:34:48<15:45:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3222/12318 [5:34:57<15:45:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3222/12318 [5:34:57<15:45:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3223/12318 [5:35:04<15:45:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3223/12318 [5:35:04<15:45:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3224/12318 [5:35:07<15:45:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3224/12318 [5:35:07<15:45:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3225/12318 [5:35:16<15:45:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3225/12318 [5:35:16<15:45:18,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3226/12318 [5:35:19<15:45:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3226/12318 [5:35:19<15:45:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3227/12318 [5:35:23<15:44:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3227/12318 [5:35:23<15:44:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3228/12318 [5:35:29<15:44:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3228/12318 [5:35:29<15:44:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3229/12318 [5:35:32<15:44:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3229/12318 [5:35:32<15:44:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3230/12318 [5:35:39<15:44:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3230/12318 [5:35:39<15:44:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3231/12318 [5:35:44<15:44:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3231/12318 [5:35:44<15:44:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3232/12318 [5:36:24<15:45:44,  6.25s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3232/12318 [5:36:24<15:45:44,  6.25s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3233/12318 [5:36:32<15:45:42,  6.25s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3233/12318 [5:36:32<15:45:42,  6.25s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3234/12318 [5:36:37<15:45:33,  6.25s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3234/12318 [5:36:37<15:45:33,  6.25s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3235/12318 [5:36:42<15:45:22,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3235/12318 [5:36:42<15:45:22,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3236/12318 [5:36:47<15:45:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3236/12318 [5:36:47<15:45:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3237/12318 [5:36:54<15:45:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3237/12318 [5:36:54<15:45:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3238/12318 [5:36:57<15:44:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3238/12318 [5:36:57<15:44:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3239/12318 [5:36:58<15:44:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3239/12318 [5:36:58<15:44:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3240/12318 [5:37:06<15:44:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3240/12318 [5:37:06<15:44:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3241/12318 [5:37:15<15:44:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3241/12318 [5:37:15<15:44:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3242/12318 [5:37:22<15:44:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3242/12318 [5:37:22<15:44:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3243/12318 [5:37:27<15:44:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3243/12318 [5:37:27<15:44:18,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3244/12318 [5:37:34<15:44:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3244/12318 [5:37:34<15:44:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3245/12318 [5:37:39<15:44:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3245/12318 [5:37:39<15:44:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3246/12318 [5:37:44<15:43:54,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3246/12318 [5:37:44<15:43:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3247/12318 [5:37:52<15:43:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3247/12318 [5:37:52<15:43:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3248/12318 [5:37:55<15:43:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3248/12318 [5:37:55<15:43:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3249/12318 [5:37:57<15:43:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3249/12318 [5:37:57<15:43:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3250/12318 [5:38:00<15:43:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3250/12318 [5:38:00<15:43:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3251/12318 [5:38:03<15:42:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3251/12318 [5:38:03<15:42:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3252/12318 [5:38:06<15:42:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3252/12318 [5:38:06<15:42:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3253/12318 [5:38:13<15:42:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3253/12318 [5:38:13<15:42:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3254/12318 [5:38:19<15:42:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3254/12318 [5:38:19<15:42:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3255/12318 [5:38:24<15:42:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3255/12318 [5:38:24<15:42:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3256/12318 [5:38:31<15:42:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3256/12318 [5:38:31<15:42:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3257/12318 [5:38:39<15:42:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3257/12318 [5:38:39<15:42:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3258/12318 [5:38:47<15:42:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3258/12318 [5:38:47<15:42:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3259/12318 [5:38:55<15:42:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3259/12318 [5:38:55<15:42:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3260/12318 [5:38:56<15:41:46,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3260/12318 [5:38:56<15:41:46,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3261/12318 [5:39:03<15:41:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3261/12318 [5:39:03<15:41:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3262/12318 [5:39:08<15:41:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3262/12318 [5:39:08<15:41:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3263/12318 [5:39:12<15:41:19,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3263/12318 [5:39:12<15:41:19,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  26%|▎| 3264/12318 [5:39:29<15:41:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  26%|▎| 3264/12318 [5:39:29<15:41:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3265/12318 [5:39:33<15:41:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3265/12318 [5:39:33<15:41:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3266/12318 [5:39:40<15:41:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3266/12318 [5:39:40<15:41:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3267/12318 [5:39:44<15:41:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3267/12318 [5:39:44<15:41:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3268/12318 [5:39:47<15:40:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3268/12318 [5:39:47<15:40:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3269/12318 [5:39:52<15:40:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3269/12318 [5:39:52<15:40:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3270/12318 [5:40:01<15:40:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3270/12318 [5:40:01<15:40:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3271/12318 [5:40:09<15:40:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3271/12318 [5:40:09<15:40:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3272/12318 [5:40:15<15:40:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3272/12318 [5:40:15<15:40:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3273/12318 [5:40:18<15:40:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3273/12318 [5:40:18<15:40:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3274/12318 [5:40:23<15:40:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3274/12318 [5:40:23<15:40:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3275/12318 [5:40:31<15:40:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3275/12318 [5:40:31<15:40:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3276/12318 [5:40:37<15:40:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3276/12318 [5:40:37<15:40:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3277/12318 [5:40:38<15:39:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3277/12318 [5:40:38<15:39:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3278/12318 [5:40:46<15:39:46,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3278/12318 [5:40:46<15:39:46,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3279/12318 [5:40:52<15:39:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3279/12318 [5:40:52<15:39:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3280/12318 [5:40:59<15:39:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3280/12318 [5:40:59<15:39:36,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3281/12318 [5:41:06<15:39:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3281/12318 [5:41:06<15:39:31,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3282/12318 [5:41:12<15:39:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3282/12318 [5:41:12<15:39:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3283/12318 [5:41:18<15:39:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3283/12318 [5:41:18<15:39:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3284/12318 [5:41:26<15:39:15,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3284/12318 [5:41:26<15:39:15,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3285/12318 [5:41:31<15:39:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3285/12318 [5:41:31<15:39:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3286/12318 [5:41:36<15:38:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3286/12318 [5:41:36<15:38:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3287/12318 [5:41:44<15:38:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3287/12318 [5:41:44<15:38:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3288/12318 [5:41:49<15:38:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3288/12318 [5:41:49<15:38:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3289/12318 [5:41:51<15:38:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3289/12318 [5:41:51<15:38:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3290/12318 [5:41:57<15:38:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3290/12318 [5:41:57<15:38:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3291/12318 [5:41:59<15:38:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3291/12318 [5:41:59<15:38:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3292/12318 [5:42:06<15:38:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3292/12318 [5:42:06<15:38:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3293/12318 [5:42:09<15:37:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3293/12318 [5:42:09<15:37:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3294/12318 [5:42:14<15:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3294/12318 [5:42:14<15:37:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3295/12318 [5:42:20<15:37:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3295/12318 [5:42:20<15:37:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3296/12318 [5:42:57<15:38:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3296/12318 [5:42:57<15:38:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3297/12318 [5:43:00<15:38:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3297/12318 [5:43:00<15:38:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3298/12318 [5:43:03<15:38:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3298/12318 [5:43:03<15:38:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3299/12318 [5:43:07<15:38:02,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3299/12318 [5:43:07<15:38:02,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3300/12318 [5:43:10<15:37:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3300/12318 [5:43:10<15:37:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3301/12318 [5:43:11<15:37:28,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3301/12318 [5:43:11<15:37:28,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3302/12318 [5:43:17<15:37:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3302/12318 [5:43:17<15:37:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3303/12318 [5:43:21<15:37:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3303/12318 [5:43:21<15:37:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3304/12318 [5:43:29<15:37:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3304/12318 [5:43:29<15:37:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3305/12318 [5:43:34<15:36:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3305/12318 [5:43:34<15:36:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3306/12318 [5:43:38<15:36:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3306/12318 [5:43:38<15:36:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3307/12318 [5:43:40<15:36:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3307/12318 [5:43:40<15:36:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3308/12318 [5:43:44<15:36:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3308/12318 [5:43:44<15:36:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3309/12318 [5:43:45<15:35:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3309/12318 [5:43:45<15:35:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3310/12318 [5:43:54<15:35:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3310/12318 [5:43:54<15:35:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3311/12318 [5:44:02<15:35:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3311/12318 [5:44:02<15:35:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3312/12318 [5:44:10<15:35:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3312/12318 [5:44:10<15:35:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3313/12318 [5:44:18<15:35:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3313/12318 [5:44:18<15:35:51,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3314/12318 [5:44:24<15:35:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3314/12318 [5:44:24<15:35:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3315/12318 [5:44:25<15:35:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3315/12318 [5:44:25<15:35:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3316/12318 [5:44:31<15:35:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3316/12318 [5:44:31<15:35:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3317/12318 [5:44:34<15:35:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3317/12318 [5:44:34<15:35:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3318/12318 [5:44:38<15:34:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3318/12318 [5:44:38<15:34:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3319/12318 [5:44:46<15:34:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3319/12318 [5:44:46<15:34:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3320/12318 [5:44:50<15:34:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3320/12318 [5:44:50<15:34:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3321/12318 [5:44:55<15:34:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3321/12318 [5:44:55<15:34:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3322/12318 [5:45:03<15:34:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3322/12318 [5:45:03<15:34:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3323/12318 [5:45:07<15:34:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3323/12318 [5:45:07<15:34:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3324/12318 [5:45:16<15:34:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3324/12318 [5:45:16<15:34:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3325/12318 [5:45:20<15:34:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3325/12318 [5:45:20<15:34:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3326/12318 [5:45:25<15:33:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3326/12318 [5:45:25<15:33:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3327/12318 [5:45:29<15:33:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3327/12318 [5:45:29<15:33:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3328/12318 [5:46:04<15:34:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3328/12318 [5:46:04<15:34:51,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3329/12318 [5:46:07<15:34:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3329/12318 [5:46:07<15:34:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3330/12318 [5:46:16<15:34:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3330/12318 [5:46:16<15:34:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3331/12318 [5:46:25<15:34:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3331/12318 [5:46:25<15:34:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3332/12318 [5:46:29<15:34:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3332/12318 [5:46:29<15:34:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3333/12318 [5:46:35<15:34:19,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3333/12318 [5:46:35<15:34:19,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3334/12318 [5:46:41<15:34:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3334/12318 [5:46:41<15:34:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3335/12318 [5:46:42<15:33:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3335/12318 [5:46:42<15:33:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3336/12318 [5:46:43<15:33:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3336/12318 [5:46:43<15:33:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3337/12318 [5:46:51<15:33:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3337/12318 [5:46:51<15:33:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3338/12318 [5:46:57<15:33:22,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3338/12318 [5:46:57<15:33:22,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3339/12318 [5:47:04<15:33:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3339/12318 [5:47:04<15:33:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3340/12318 [5:47:08<15:33:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3340/12318 [5:47:08<15:33:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3341/12318 [5:47:13<15:32:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3341/12318 [5:47:13<15:32:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3342/12318 [5:47:22<15:32:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3342/12318 [5:47:22<15:32:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3343/12318 [5:47:26<15:32:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3343/12318 [5:47:26<15:32:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3344/12318 [5:47:31<15:32:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3344/12318 [5:47:31<15:32:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3345/12318 [5:47:38<15:32:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3345/12318 [5:47:38<15:32:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3346/12318 [5:47:46<15:32:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3346/12318 [5:47:46<15:32:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3347/12318 [5:47:52<15:32:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3347/12318 [5:47:52<15:32:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3348/12318 [5:47:59<15:32:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3348/12318 [5:47:59<15:32:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3349/12318 [5:48:04<15:32:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3349/12318 [5:48:04<15:32:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3350/12318 [5:48:09<15:32:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3350/12318 [5:48:09<15:32:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3351/12318 [5:48:17<15:32:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3351/12318 [5:48:17<15:32:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3352/12318 [5:48:21<15:31:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3352/12318 [5:48:21<15:31:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3353/12318 [5:48:28<15:31:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3353/12318 [5:48:28<15:31:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3354/12318 [5:48:37<15:31:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3354/12318 [5:48:37<15:31:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3355/12318 [5:48:38<15:31:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3355/12318 [5:48:38<15:31:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3356/12318 [5:48:44<15:31:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3356/12318 [5:48:44<15:31:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3357/12318 [5:48:47<15:31:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3357/12318 [5:48:47<15:31:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3358/12318 [5:48:53<15:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3358/12318 [5:48:53<15:30:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3359/12318 [5:49:01<15:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3359/12318 [5:49:01<15:30:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3360/12318 [5:49:21<15:31:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3360/12318 [5:49:21<15:31:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3361/12318 [5:49:24<15:31:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3361/12318 [5:49:24<15:31:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3362/12318 [5:49:32<15:31:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3362/12318 [5:49:32<15:31:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3363/12318 [5:49:38<15:31:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3363/12318 [5:49:38<15:31:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3364/12318 [5:49:43<15:30:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3364/12318 [5:49:43<15:30:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3365/12318 [5:49:50<15:30:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3365/12318 [5:49:50<15:30:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3366/12318 [5:49:55<15:30:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3366/12318 [5:49:55<15:30:37,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3367/12318 [5:50:01<15:30:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3367/12318 [5:50:01<15:30:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3368/12318 [5:50:10<15:30:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3368/12318 [5:50:10<15:30:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3369/12318 [5:50:17<15:30:27,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3369/12318 [5:50:17<15:30:27,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3370/12318 [5:50:21<15:30:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3370/12318 [5:50:21<15:30:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3371/12318 [5:50:24<15:30:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3371/12318 [5:50:24<15:30:00,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3372/12318 [5:50:29<15:29:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3372/12318 [5:50:29<15:29:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3373/12318 [5:50:37<15:29:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3373/12318 [5:50:37<15:29:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3374/12318 [5:50:41<15:29:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3374/12318 [5:50:41<15:29:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3375/12318 [5:50:45<15:29:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3375/12318 [5:50:45<15:29:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3376/12318 [5:50:46<15:29:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3376/12318 [5:50:46<15:29:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3377/12318 [5:50:53<15:29:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3377/12318 [5:50:53<15:29:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3378/12318 [5:51:01<15:29:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3378/12318 [5:51:01<15:29:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3379/12318 [5:51:03<15:28:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3379/12318 [5:51:03<15:28:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3380/12318 [5:51:06<15:28:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3380/12318 [5:51:06<15:28:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3381/12318 [5:51:12<15:28:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3381/12318 [5:51:12<15:28:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3382/12318 [5:51:16<15:28:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3382/12318 [5:51:16<15:28:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3383/12318 [5:51:18<15:27:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3383/12318 [5:51:18<15:27:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3384/12318 [5:51:23<15:27:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3384/12318 [5:51:23<15:27:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3385/12318 [5:51:30<15:27:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3385/12318 [5:51:30<15:27:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3386/12318 [5:51:35<15:27:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3386/12318 [5:51:35<15:27:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  27%|▎| 3387/12318 [5:51:42<15:27:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  27%|▎| 3387/12318 [5:51:42<15:27:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3388/12318 [5:51:49<15:27:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3388/12318 [5:51:49<15:27:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3389/12318 [5:51:54<15:27:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3389/12318 [5:51:54<15:27:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3390/12318 [5:51:59<15:26:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3390/12318 [5:51:59<15:26:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3391/12318 [5:52:00<15:26:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3391/12318 [5:52:00<15:26:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3392/12318 [5:52:42<15:28:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3392/12318 [5:52:42<15:28:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3393/12318 [5:52:46<15:27:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3393/12318 [5:52:46<15:27:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3394/12318 [5:52:53<15:27:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3394/12318 [5:52:53<15:27:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3395/12318 [5:52:57<15:27:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3395/12318 [5:52:57<15:27:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3396/12318 [5:53:06<15:27:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3396/12318 [5:53:06<15:27:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3397/12318 [5:53:12<15:27:34,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3397/12318 [5:53:12<15:27:34,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3398/12318 [5:53:21<15:27:34,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3398/12318 [5:53:21<15:27:34,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3399/12318 [5:53:24<15:27:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3399/12318 [5:53:24<15:27:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3400/12318 [5:53:25<15:27:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3400/12318 [5:53:25<15:27:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3401/12318 [5:53:28<15:26:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3401/12318 [5:53:28<15:26:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3402/12318 [5:53:35<15:26:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3402/12318 [5:53:35<15:26:42,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3403/12318 [5:53:40<15:26:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3403/12318 [5:53:40<15:26:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3404/12318 [5:53:43<15:26:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3404/12318 [5:53:43<15:26:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3405/12318 [5:53:51<15:26:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3405/12318 [5:53:51<15:26:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3406/12318 [5:53:53<15:25:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3406/12318 [5:53:53<15:25:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3407/12318 [5:53:57<15:25:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3407/12318 [5:53:57<15:25:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3408/12318 [5:54:02<15:25:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3408/12318 [5:54:02<15:25:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3409/12318 [5:54:05<15:25:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3409/12318 [5:54:05<15:25:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3410/12318 [5:54:11<15:25:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3410/12318 [5:54:11<15:25:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3411/12318 [5:54:12<15:24:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3411/12318 [5:54:12<15:24:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3412/12318 [5:54:21<15:24:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3412/12318 [5:54:21<15:24:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3413/12318 [5:54:26<15:24:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3413/12318 [5:54:26<15:24:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3414/12318 [5:54:34<15:24:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3414/12318 [5:54:34<15:24:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3415/12318 [5:54:40<15:24:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3415/12318 [5:54:40<15:24:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3416/12318 [5:54:44<15:24:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3416/12318 [5:54:44<15:24:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3417/12318 [5:54:50<15:24:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3417/12318 [5:54:50<15:24:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3418/12318 [5:54:55<15:24:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3418/12318 [5:54:55<15:24:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3419/12318 [5:55:03<15:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3419/12318 [5:55:03<15:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3420/12318 [5:55:04<15:23:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3420/12318 [5:55:04<15:23:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3421/12318 [5:55:11<15:23:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3421/12318 [5:55:11<15:23:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3422/12318 [5:55:16<15:23:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3422/12318 [5:55:16<15:23:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3423/12318 [5:55:23<15:23:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3423/12318 [5:55:23<15:23:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3424/12318 [5:55:53<15:24:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3424/12318 [5:55:53<15:24:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3425/12318 [5:55:59<15:24:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3425/12318 [5:55:59<15:24:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3426/12318 [5:56:01<15:24:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3426/12318 [5:56:01<15:24:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3427/12318 [5:56:03<15:23:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3427/12318 [5:56:03<15:23:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3428/12318 [5:56:12<15:23:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3428/12318 [5:56:12<15:23:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3429/12318 [5:56:20<15:23:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3429/12318 [5:56:20<15:23:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3430/12318 [5:56:25<15:23:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3430/12318 [5:56:25<15:23:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3431/12318 [5:56:30<15:23:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3431/12318 [5:56:30<15:23:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3432/12318 [5:56:39<15:23:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3432/12318 [5:56:39<15:23:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3433/12318 [5:56:47<15:23:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3433/12318 [5:56:47<15:23:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3434/12318 [5:56:49<15:23:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3434/12318 [5:56:49<15:23:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3435/12318 [5:56:53<15:22:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3435/12318 [5:56:53<15:22:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3436/12318 [5:57:00<15:22:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3436/12318 [5:57:00<15:22:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3437/12318 [5:57:04<15:22:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3437/12318 [5:57:04<15:22:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3438/12318 [5:57:06<15:22:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3438/12318 [5:57:06<15:22:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3439/12318 [5:57:15<15:22:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3439/12318 [5:57:15<15:22:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3440/12318 [5:57:22<15:22:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3440/12318 [5:57:22<15:22:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3441/12318 [5:57:27<15:22:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3441/12318 [5:57:27<15:22:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3442/12318 [5:57:35<15:22:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3442/12318 [5:57:35<15:22:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3443/12318 [5:57:42<15:22:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3443/12318 [5:57:42<15:22:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3444/12318 [5:57:48<15:21:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3444/12318 [5:57:48<15:21:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3445/12318 [5:57:52<15:21:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3445/12318 [5:57:52<15:21:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3446/12318 [5:57:54<15:21:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3446/12318 [5:57:54<15:21:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3447/12318 [5:58:01<15:21:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3447/12318 [5:58:01<15:21:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3448/12318 [5:58:08<15:21:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3448/12318 [5:58:08<15:21:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3449/12318 [5:58:12<15:21:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3449/12318 [5:58:12<15:21:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3450/12318 [5:58:19<15:21:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3450/12318 [5:58:19<15:21:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3451/12318 [5:58:20<15:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3451/12318 [5:58:20<15:20:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3452/12318 [5:58:28<15:20:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3452/12318 [5:58:28<15:20:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3453/12318 [5:58:35<15:20:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3453/12318 [5:58:35<15:20:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3454/12318 [5:58:37<15:20:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3454/12318 [5:58:37<15:20:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3455/12318 [5:58:45<15:20:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3455/12318 [5:58:45<15:20:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3456/12318 [5:59:16<15:21:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3456/12318 [5:59:16<15:21:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3457/12318 [5:59:24<15:21:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3457/12318 [5:59:24<15:21:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3458/12318 [5:59:29<15:21:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3458/12318 [5:59:29<15:21:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3459/12318 [5:59:37<15:21:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3459/12318 [5:59:37<15:21:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3460/12318 [5:59:42<15:20:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3460/12318 [5:59:42<15:20:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3461/12318 [5:59:47<15:20:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3461/12318 [5:59:47<15:20:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3462/12318 [5:59:49<15:20:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3462/12318 [5:59:49<15:20:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3463/12318 [5:59:58<15:20:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3463/12318 [5:59:58<15:20:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3464/12318 [5:59:59<15:20:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3464/12318 [5:59:59<15:20:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3465/12318 [6:00:02<15:19:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3465/12318 [6:00:02<15:19:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3466/12318 [6:00:09<15:19:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3466/12318 [6:00:09<15:19:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3467/12318 [6:00:15<15:19:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3467/12318 [6:00:15<15:19:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3468/12318 [6:00:22<15:19:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3468/12318 [6:00:22<15:19:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3469/12318 [6:00:25<15:19:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3469/12318 [6:00:25<15:19:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3470/12318 [6:00:27<15:19:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3470/12318 [6:00:27<15:19:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3471/12318 [6:00:32<15:18:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3471/12318 [6:00:32<15:18:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3472/12318 [6:00:35<15:18:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3472/12318 [6:00:35<15:18:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3473/12318 [6:00:40<15:18:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3473/12318 [6:00:40<15:18:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3474/12318 [6:00:45<15:18:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3474/12318 [6:00:45<15:18:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3475/12318 [6:00:50<15:18:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3475/12318 [6:00:50<15:18:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3476/12318 [6:00:53<15:18:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3476/12318 [6:00:53<15:18:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3477/12318 [6:00:56<15:17:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3477/12318 [6:00:56<15:17:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3478/12318 [6:01:05<15:17:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3478/12318 [6:01:05<15:17:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3479/12318 [6:01:10<15:17:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3479/12318 [6:01:10<15:17:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3480/12318 [6:01:11<15:17:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3480/12318 [6:01:11<15:17:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3481/12318 [6:01:12<15:16:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3481/12318 [6:01:12<15:16:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3482/12318 [6:01:16<15:16:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3482/12318 [6:01:16<15:16:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3483/12318 [6:01:17<15:16:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3483/12318 [6:01:17<15:16:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3484/12318 [6:01:26<15:16:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3484/12318 [6:01:26<15:16:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3485/12318 [6:01:33<15:16:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3485/12318 [6:01:33<15:16:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3486/12318 [6:01:39<15:16:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3486/12318 [6:01:39<15:16:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3487/12318 [6:01:47<15:16:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3487/12318 [6:01:47<15:16:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3488/12318 [6:02:25<15:17:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3488/12318 [6:02:25<15:17:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3489/12318 [6:02:32<15:17:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3489/12318 [6:02:32<15:17:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3490/12318 [6:02:40<15:17:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3490/12318 [6:02:40<15:17:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3491/12318 [6:02:42<15:17:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3491/12318 [6:02:42<15:17:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3492/12318 [6:02:47<15:16:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3492/12318 [6:02:47<15:16:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3493/12318 [6:02:49<15:16:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3493/12318 [6:02:49<15:16:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3494/12318 [6:02:54<15:16:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3494/12318 [6:02:54<15:16:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3495/12318 [6:02:59<15:16:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3495/12318 [6:02:59<15:16:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3496/12318 [6:03:03<15:16:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3496/12318 [6:03:03<15:16:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3497/12318 [6:03:10<15:16:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3497/12318 [6:03:10<15:16:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3498/12318 [6:03:13<15:15:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3498/12318 [6:03:13<15:15:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3499/12318 [6:03:16<15:15:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3499/12318 [6:03:16<15:15:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3500/12318 [6:03:22<15:15:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3500/12318 [6:03:22<15:15:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3501/12318 [6:03:24<15:15:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3501/12318 [6:03:24<15:15:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3502/12318 [6:03:26<15:14:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3502/12318 [6:03:26<15:14:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3503/12318 [6:03:29<15:14:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3503/12318 [6:03:29<15:14:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3504/12318 [6:03:32<15:14:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3504/12318 [6:03:32<15:14:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3505/12318 [6:03:36<15:14:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3505/12318 [6:03:36<15:14:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3506/12318 [6:03:45<15:14:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3506/12318 [6:03:45<15:14:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3507/12318 [6:03:51<15:14:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3507/12318 [6:03:51<15:14:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3508/12318 [6:03:58<15:14:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3508/12318 [6:03:58<15:14:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3509/12318 [6:04:04<15:13:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3509/12318 [6:04:04<15:13:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  28%|▎| 3510/12318 [6:04:08<15:13:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  28%|▎| 3510/12318 [6:04:08<15:13:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3511/12318 [6:04:15<15:13:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3511/12318 [6:04:15<15:13:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3512/12318 [6:04:21<15:13:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3512/12318 [6:04:21<15:13:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3513/12318 [6:04:26<15:13:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3513/12318 [6:04:26<15:13:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3514/12318 [6:04:30<15:13:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3514/12318 [6:04:30<15:13:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3515/12318 [6:04:34<15:13:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3515/12318 [6:04:34<15:13:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3516/12318 [6:04:41<15:12:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3516/12318 [6:04:41<15:12:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3517/12318 [6:04:45<15:12:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3517/12318 [6:04:45<15:12:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3518/12318 [6:04:54<15:12:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3518/12318 [6:04:54<15:12:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3519/12318 [6:05:01<15:12:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3519/12318 [6:05:01<15:12:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3520/12318 [6:05:28<15:13:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3520/12318 [6:05:28<15:13:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3521/12318 [6:05:35<15:13:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3521/12318 [6:05:35<15:13:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3522/12318 [6:05:37<15:13:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3522/12318 [6:05:37<15:13:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3523/12318 [6:05:41<15:12:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3523/12318 [6:05:41<15:12:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3524/12318 [6:05:44<15:12:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3524/12318 [6:05:44<15:12:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3525/12318 [6:05:47<15:12:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3525/12318 [6:05:47<15:12:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3526/12318 [6:05:55<15:12:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3526/12318 [6:05:55<15:12:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3527/12318 [6:05:59<15:12:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3527/12318 [6:05:59<15:12:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3528/12318 [6:06:04<15:12:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3528/12318 [6:06:04<15:12:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3529/12318 [6:06:09<15:11:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3529/12318 [6:06:09<15:11:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3530/12318 [6:06:17<15:11:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3530/12318 [6:06:17<15:11:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3531/12318 [6:06:26<15:11:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3531/12318 [6:06:26<15:11:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3532/12318 [6:06:29<15:11:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3532/12318 [6:06:29<15:11:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3533/12318 [6:06:35<15:11:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3533/12318 [6:06:35<15:11:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3534/12318 [6:06:39<15:11:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3534/12318 [6:06:39<15:11:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3535/12318 [6:06:45<15:11:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3535/12318 [6:06:45<15:11:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3536/12318 [6:06:48<15:11:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3536/12318 [6:06:48<15:11:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3537/12318 [6:06:54<15:10:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3537/12318 [6:06:54<15:10:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3538/12318 [6:07:02<15:10:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3538/12318 [6:07:02<15:10:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3539/12318 [6:07:08<15:10:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3539/12318 [6:07:08<15:10:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3540/12318 [6:07:17<15:10:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3540/12318 [6:07:17<15:10:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3541/12318 [6:07:20<15:10:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3541/12318 [6:07:20<15:10:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3542/12318 [6:07:25<15:10:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3542/12318 [6:07:25<15:10:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3543/12318 [6:07:32<15:10:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3543/12318 [6:07:32<15:10:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3544/12318 [6:07:39<15:10:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3544/12318 [6:07:39<15:10:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3545/12318 [6:07:46<15:10:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3545/12318 [6:07:46<15:10:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3546/12318 [6:07:53<15:10:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3546/12318 [6:07:53<15:10:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3547/12318 [6:07:54<15:09:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3547/12318 [6:07:54<15:09:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3548/12318 [6:07:57<15:09:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3548/12318 [6:07:57<15:09:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3549/12318 [6:08:03<15:09:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3549/12318 [6:08:03<15:09:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3550/12318 [6:08:12<15:09:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3550/12318 [6:08:12<15:09:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3551/12318 [6:08:16<15:09:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3551/12318 [6:08:16<15:09:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3552/12318 [6:08:48<15:10:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3552/12318 [6:08:48<15:10:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3553/12318 [6:08:50<15:09:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3553/12318 [6:08:50<15:09:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3554/12318 [6:08:59<15:09:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|���| 3554/12318 [6:08:59<15:09:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3555/12318 [6:09:07<15:09:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3555/12318 [6:09:07<15:09:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3556/12318 [6:09:12<15:09:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3556/12318 [6:09:12<15:09:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3557/12318 [6:09:17<15:09:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3557/12318 [6:09:17<15:09:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3558/12318 [6:09:23<15:09:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3558/12318 [6:09:23<15:09:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3559/12318 [6:09:28<15:09:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3559/12318 [6:09:28<15:09:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3560/12318 [6:09:29<15:09:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3560/12318 [6:09:29<15:09:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3561/12318 [6:09:33<15:08:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3561/12318 [6:09:33<15:08:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3562/12318 [6:09:42<15:08:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3562/12318 [6:09:42<15:08:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3563/12318 [6:09:49<15:08:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3563/12318 [6:09:49<15:08:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3564/12318 [6:09:53<15:08:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3564/12318 [6:09:53<15:08:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3565/12318 [6:09:59<15:08:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3565/12318 [6:09:59<15:08:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3566/12318 [6:10:08<15:08:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3566/12318 [6:10:08<15:08:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3567/12318 [6:10:12<15:08:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3567/12318 [6:10:12<15:08:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3568/12318 [6:10:18<15:08:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3568/12318 [6:10:18<15:08:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3569/12318 [6:10:22<15:07:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3569/12318 [6:10:22<15:07:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3570/12318 [6:10:23<15:07:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3570/12318 [6:10:23<15:07:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3571/12318 [6:10:31<15:07:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3571/12318 [6:10:31<15:07:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3572/12318 [6:10:38<15:07:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3572/12318 [6:10:38<15:07:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3573/12318 [6:10:42<15:07:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3573/12318 [6:10:42<15:07:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3574/12318 [6:10:45<15:07:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3574/12318 [6:10:45<15:07:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3575/12318 [6:10:54<15:07:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3575/12318 [6:10:54<15:07:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3576/12318 [6:11:03<15:07:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3576/12318 [6:11:03<15:07:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3577/12318 [6:11:06<15:06:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3577/12318 [6:11:06<15:06:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3578/12318 [6:11:07<15:06:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3578/12318 [6:11:07<15:06:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3579/12318 [6:11:10<15:06:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3579/12318 [6:11:10<15:06:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3580/12318 [6:11:13<15:06:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3580/12318 [6:11:13<15:06:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3581/12318 [6:11:18<15:05:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3581/12318 [6:11:18<15:05:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3582/12318 [6:11:24<15:05:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3582/12318 [6:11:24<15:05:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3583/12318 [6:11:31<15:05:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3583/12318 [6:11:31<15:05:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3584/12318 [6:11:59<15:06:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3584/12318 [6:11:59<15:06:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3585/12318 [6:12:05<15:06:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3585/12318 [6:12:05<15:06:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3586/12318 [6:12:09<15:06:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3586/12318 [6:12:09<15:06:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3587/12318 [6:12:18<15:06:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3587/12318 [6:12:18<15:06:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3588/12318 [6:12:20<15:05:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3588/12318 [6:12:20<15:05:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3589/12318 [6:12:23<15:05:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3589/12318 [6:12:23<15:05:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3590/12318 [6:12:29<15:05:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3590/12318 [6:12:29<15:05:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3591/12318 [6:12:38<15:05:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3591/12318 [6:12:38<15:05:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3592/12318 [6:12:42<15:05:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3592/12318 [6:12:42<15:05:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3593/12318 [6:12:47<15:05:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3593/12318 [6:12:47<15:05:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3594/12318 [6:12:55<15:05:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3594/12318 [6:12:55<15:05:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3595/12318 [6:13:00<15:05:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3595/12318 [6:13:00<15:05:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3596/12318 [6:13:06<15:04:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3596/12318 [6:13:06<15:04:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3597/12318 [6:13:10<15:04:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3597/12318 [6:13:10<15:04:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3598/12318 [6:13:18<15:04:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3598/12318 [6:13:18<15:04:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3599/12318 [6:13:27<15:04:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3599/12318 [6:13:27<15:04:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3600/12318 [6:13:33<15:04:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3600/12318 [6:13:33<15:04:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3601/12318 [6:13:41<15:04:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3601/12318 [6:13:41<15:04:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3602/12318 [6:13:47<15:04:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3602/12318 [6:13:47<15:04:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3603/12318 [6:13:53<15:04:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3603/12318 [6:13:53<15:04:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3604/12318 [6:13:58<15:04:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3604/12318 [6:13:58<15:04:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3605/12318 [6:14:05<15:04:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3605/12318 [6:14:05<15:04:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3606/12318 [6:14:12<15:04:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3606/12318 [6:14:12<15:04:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3607/12318 [6:14:20<15:04:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3607/12318 [6:14:20<15:04:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3608/12318 [6:14:24<15:03:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3608/12318 [6:14:24<15:03:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3609/12318 [6:14:32<15:03:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3609/12318 [6:14:32<15:03:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3610/12318 [6:14:36<15:03:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3610/12318 [6:14:36<15:03:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3611/12318 [6:14:41<15:03:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3611/12318 [6:14:41<15:03:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3612/12318 [6:14:48<15:03:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3612/12318 [6:14:48<15:03:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3613/12318 [6:14:54<15:03:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3613/12318 [6:14:54<15:03:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3614/12318 [6:15:02<15:03:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3614/12318 [6:15:02<15:03:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3615/12318 [6:15:03<15:02:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3615/12318 [6:15:03<15:02:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3616/12318 [6:15:20<15:03:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3616/12318 [6:15:20<15:03:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3617/12318 [6:15:23<15:03:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3617/12318 [6:15:23<15:03:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3618/12318 [6:15:30<15:02:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3618/12318 [6:15:30<15:02:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3619/12318 [6:15:31<15:02:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3619/12318 [6:15:31<15:02:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3620/12318 [6:15:38<15:02:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3620/12318 [6:15:38<15:02:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3621/12318 [6:15:43<15:02:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3621/12318 [6:15:43<15:02:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3622/12318 [6:15:51<15:02:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3622/12318 [6:15:51<15:02:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3623/12318 [6:15:53<15:02:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3623/12318 [6:15:53<15:02:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3624/12318 [6:15:57<15:01:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3624/12318 [6:15:57<15:01:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3625/12318 [6:16:04<15:01:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3625/12318 [6:16:04<15:01:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3626/12318 [6:16:05<15:01:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3626/12318 [6:16:05<15:01:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3627/12318 [6:16:10<15:01:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3627/12318 [6:16:10<15:01:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3628/12318 [6:16:18<15:01:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3628/12318 [6:16:18<15:01:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3629/12318 [6:16:24<15:01:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3629/12318 [6:16:24<15:01:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3630/12318 [6:16:28<15:01:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3630/12318 [6:16:28<15:01:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3631/12318 [6:16:36<15:01:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3631/12318 [6:16:36<15:01:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3632/12318 [6:16:38<15:00:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3632/12318 [6:16:38<15:00:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  29%|▎| 3633/12318 [6:16:41<15:00:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  29%|▎| 3633/12318 [6:16:41<15:00:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3634/12318 [6:16:42<15:00:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3634/12318 [6:16:42<15:00:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3635/12318 [6:16:49<15:00:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3635/12318 [6:16:49<15:00:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3636/12318 [6:16:53<14:59:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3636/12318 [6:16:53<14:59:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3637/12318 [6:16:59<14:59:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3637/12318 [6:16:59<14:59:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3638/12318 [6:17:07<14:59:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3638/12318 [6:17:07<14:59:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3639/12318 [6:17:16<14:59:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3639/12318 [6:17:16<14:59:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3640/12318 [6:17:22<14:59:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3640/12318 [6:17:22<14:59:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3641/12318 [6:17:27<14:59:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3641/12318 [6:17:27<14:59:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3642/12318 [6:17:32<14:59:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3642/12318 [6:17:32<14:59:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3643/12318 [6:17:41<14:59:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3643/12318 [6:17:41<14:59:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3644/12318 [6:17:50<14:59:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3644/12318 [6:17:50<14:59:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3645/12318 [6:17:53<14:59:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3645/12318 [6:17:53<14:59:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3646/12318 [6:17:58<14:59:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3646/12318 [6:17:58<14:59:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3647/12318 [6:18:00<14:58:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3647/12318 [6:18:00<14:58:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3648/12318 [6:18:26<14:59:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3648/12318 [6:18:26<14:59:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3649/12318 [6:18:33<14:59:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3649/12318 [6:18:33<14:59:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3650/12318 [6:18:40<14:59:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3650/12318 [6:18:40<14:59:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3651/12318 [6:18:42<14:59:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3651/12318 [6:18:42<14:59:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3652/12318 [6:18:48<14:58:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3652/12318 [6:18:48<14:58:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3653/12318 [6:18:57<14:58:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3653/12318 [6:18:57<14:58:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3654/12318 [6:19:02<14:58:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3654/12318 [6:19:02<14:58:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3655/12318 [6:19:10<14:58:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3655/12318 [6:19:10<14:58:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3656/12318 [6:19:16<14:58:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3656/12318 [6:19:16<14:58:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3657/12318 [6:19:24<14:58:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3657/12318 [6:19:24<14:58:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3658/12318 [6:19:32<14:58:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3658/12318 [6:19:32<14:58:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3659/12318 [6:19:33<14:58:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3659/12318 [6:19:33<14:58:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3660/12318 [6:19:38<14:58:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3660/12318 [6:19:38<14:58:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3661/12318 [6:19:45<14:58:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3661/12318 [6:19:45<14:58:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3662/12318 [6:19:50<14:57:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3662/12318 [6:19:50<14:57:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3663/12318 [6:19:57<14:57:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3663/12318 [6:19:57<14:57:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3664/12318 [6:20:05<14:57:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3664/12318 [6:20:05<14:57:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3665/12318 [6:20:10<14:57:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3665/12318 [6:20:10<14:57:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3666/12318 [6:20:15<14:57:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3666/12318 [6:20:15<14:57:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3667/12318 [6:20:23<14:57:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3667/12318 [6:20:23<14:57:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3668/12318 [6:20:24<14:57:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3668/12318 [6:20:24<14:57:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3669/12318 [6:20:32<14:57:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3669/12318 [6:20:32<14:57:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3670/12318 [6:20:40<14:57:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3670/12318 [6:20:40<14:57:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3671/12318 [6:20:48<14:57:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3671/12318 [6:20:48<14:57:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3672/12318 [6:20:50<14:56:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3672/12318 [6:20:50<14:56:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3673/12318 [6:20:56<14:56:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3673/12318 [6:20:56<14:56:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3674/12318 [6:21:03<14:56:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3674/12318 [6:21:03<14:56:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3675/12318 [6:21:08<14:56:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3675/12318 [6:21:08<14:56:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3676/12318 [6:21:15<14:56:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3676/12318 [6:21:15<14:56:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3677/12318 [6:21:18<14:56:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3677/12318 [6:21:18<14:56:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3678/12318 [6:21:23<14:55:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3678/12318 [6:21:23<14:55:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3679/12318 [6:21:32<14:55:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3679/12318 [6:21:32<14:55:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3680/12318 [6:21:49<14:56:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3680/12318 [6:21:49<14:56:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3681/12318 [6:21:54<14:56:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3681/12318 [6:21:54<14:56:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3682/12318 [6:22:01<14:56:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3682/12318 [6:22:01<14:56:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3683/12318 [6:22:09<14:56:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3683/12318 [6:22:09<14:56:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3684/12318 [6:22:12<14:55:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3684/12318 [6:22:12<14:55:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3685/12318 [6:22:17<14:55:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3685/12318 [6:22:17<14:55:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3686/12318 [6:22:19<14:55:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3686/12318 [6:22:19<14:55:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3687/12318 [6:22:26<14:55:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3687/12318 [6:22:26<14:55:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3688/12318 [6:22:32<14:55:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3688/12318 [6:22:32<14:55:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3689/12318 [6:22:36<14:54:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3689/12318 [6:22:36<14:54:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3690/12318 [6:22:42<14:54:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3690/12318 [6:22:42<14:54:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3691/12318 [6:22:47<14:54:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3691/12318 [6:22:47<14:54:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3692/12318 [6:22:50<14:54:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3692/12318 [6:22:50<14:54:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3693/12318 [6:22:55<14:54:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3693/12318 [6:22:55<14:54:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3694/12318 [6:23:04<14:54:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3694/12318 [6:23:04<14:54:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3695/12318 [6:23:13<14:54:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3695/12318 [6:23:13<14:54:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3696/12318 [6:23:15<14:54:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3696/12318 [6:23:15<14:54:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3697/12318 [6:23:20<14:53:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3697/12318 [6:23:20<14:53:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3698/12318 [6:23:29<14:53:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3698/12318 [6:23:29<14:53:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3699/12318 [6:23:35<14:53:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3699/12318 [6:23:35<14:53:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3700/12318 [6:23:40<14:53:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3700/12318 [6:23:40<14:53:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3701/12318 [6:23:48<14:53:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3701/12318 [6:23:48<14:53:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3702/12318 [6:23:55<14:53:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3702/12318 [6:23:55<14:53:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3703/12318 [6:24:02<14:53:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3703/12318 [6:24:02<14:53:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3704/12318 [6:24:07<14:53:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3704/12318 [6:24:07<14:53:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3705/12318 [6:24:10<14:53:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3705/12318 [6:24:10<14:53:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3706/12318 [6:24:15<14:52:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3706/12318 [6:24:15<14:52:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3707/12318 [6:24:22<14:52:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3707/12318 [6:24:22<14:52:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3708/12318 [6:24:31<14:52:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3708/12318 [6:24:31<14:52:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3709/12318 [6:24:33<14:52:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3709/12318 [6:24:33<14:52:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3710/12318 [6:24:41<14:52:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3710/12318 [6:24:41<14:52:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3711/12318 [6:24:44<14:52:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3711/12318 [6:24:44<14:52:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3712/12318 [6:25:02<14:52:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3712/12318 [6:25:02<14:52:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3713/12318 [6:25:05<14:52:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3713/12318 [6:25:05<14:52:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3714/12318 [6:25:13<14:52:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3714/12318 [6:25:13<14:52:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3715/12318 [6:25:17<14:52:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3715/12318 [6:25:17<14:52:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3716/12318 [6:25:22<14:52:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3716/12318 [6:25:22<14:52:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3717/12318 [6:25:28<14:51:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3717/12318 [6:25:28<14:51:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3718/12318 [6:25:31<14:51:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3718/12318 [6:25:31<14:51:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3719/12318 [6:25:37<14:51:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3719/12318 [6:25:37<14:51:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3720/12318 [6:25:41<14:51:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3720/12318 [6:25:41<14:51:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3721/12318 [6:25:48<14:51:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3721/12318 [6:25:48<14:51:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3722/12318 [6:25:53<14:51:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3722/12318 [6:25:53<14:51:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3723/12318 [6:25:57<14:51:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3723/12318 [6:25:57<14:51:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3724/12318 [6:26:01<14:50:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3724/12318 [6:26:01<14:50:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3725/12318 [6:26:05<14:50:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3725/12318 [6:26:05<14:50:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3726/12318 [6:26:12<14:50:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3726/12318 [6:26:12<14:50:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3727/12318 [6:26:18<14:50:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3727/12318 [6:26:18<14:50:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3728/12318 [6:26:27<14:50:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3728/12318 [6:26:27<14:50:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3729/12318 [6:26:33<14:50:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3729/12318 [6:26:33<14:50:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3730/12318 [6:26:34<14:50:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3730/12318 [6:26:34<14:50:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3731/12318 [6:26:42<14:50:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3731/12318 [6:26:42<14:50:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3732/12318 [6:26:49<14:49:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3732/12318 [6:26:49<14:49:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3733/12318 [6:26:56<14:49:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3733/12318 [6:26:56<14:49:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3734/12318 [6:27:01<14:49:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3734/12318 [6:27:01<14:49:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3735/12318 [6:27:08<14:49:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3735/12318 [6:27:08<14:49:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3736/12318 [6:27:15<14:49:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3736/12318 [6:27:15<14:49:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3737/12318 [6:27:23<14:49:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3737/12318 [6:27:23<14:49:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3738/12318 [6:27:28<14:49:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3738/12318 [6:27:28<14:49:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3739/12318 [6:27:33<14:49:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3739/12318 [6:27:33<14:49:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3740/12318 [6:27:38<14:49:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3740/12318 [6:27:38<14:49:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3741/12318 [6:27:43<14:48:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3741/12318 [6:27:43<14:48:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3742/12318 [6:27:52<14:48:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3742/12318 [6:27:52<14:48:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3743/12318 [6:27:57<14:48:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3743/12318 [6:27:57<14:48:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3744/12318 [6:28:14<14:49:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3744/12318 [6:28:14<14:49:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3745/12318 [6:28:18<14:48:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3745/12318 [6:28:18<14:48:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3746/12318 [6:28:27<14:48:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3746/12318 [6:28:27<14:48:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3747/12318 [6:28:31<14:48:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3747/12318 [6:28:31<14:48:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3748/12318 [6:28:40<14:48:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3748/12318 [6:28:40<14:48:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3749/12318 [6:28:46<14:48:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3749/12318 [6:28:46<14:48:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3750/12318 [6:28:51<14:48:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3750/12318 [6:28:51<14:48:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3751/12318 [6:28:54<14:48:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3751/12318 [6:28:54<14:48:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3752/12318 [6:28:57<14:48:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3752/12318 [6:28:57<14:48:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3753/12318 [6:29:05<14:47:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3753/12318 [6:29:05<14:47:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3754/12318 [6:29:11<14:47:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3754/12318 [6:29:11<14:47:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3755/12318 [6:29:17<14:47:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3755/12318 [6:29:17<14:47:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  30%|▎| 3756/12318 [6:29:22<14:47:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  30%|▎| 3756/12318 [6:29:22<14:47:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3757/12318 [6:29:27<14:47:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3757/12318 [6:29:27<14:47:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3758/12318 [6:29:35<14:47:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3758/12318 [6:29:35<14:47:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3759/12318 [6:29:42<14:47:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3759/12318 [6:29:42<14:47:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3760/12318 [6:29:51<14:47:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3760/12318 [6:29:51<14:47:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3761/12318 [6:29:56<14:47:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3761/12318 [6:29:56<14:47:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3762/12318 [6:30:01<14:47:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3762/12318 [6:30:01<14:47:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3763/12318 [6:30:07<14:46:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3763/12318 [6:30:07<14:46:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3764/12318 [6:30:08<14:46:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3764/12318 [6:30:08<14:46:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3765/12318 [6:30:10<14:46:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3765/12318 [6:30:10<14:46:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3766/12318 [6:30:16<14:46:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3766/12318 [6:30:16<14:46:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3767/12318 [6:30:18<14:45:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3767/12318 [6:30:18<14:45:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3768/12318 [6:30:23<14:45:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3768/12318 [6:30:23<14:45:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3769/12318 [6:30:28<14:45:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3769/12318 [6:30:28<14:45:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3770/12318 [6:30:32<14:45:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3770/12318 [6:30:32<14:45:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3771/12318 [6:30:39<14:45:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3771/12318 [6:30:39<14:45:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3772/12318 [6:30:41<14:45:10,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3772/12318 [6:30:41<14:45:10,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3773/12318 [6:30:47<14:45:03,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3773/12318 [6:30:47<14:45:03,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3774/12318 [6:30:51<14:44:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3774/12318 [6:30:51<14:44:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3775/12318 [6:30:58<14:44:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3775/12318 [6:30:58<14:44:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3776/12318 [6:31:26<14:45:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3776/12318 [6:31:26<14:45:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3777/12318 [6:31:35<14:45:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3777/12318 [6:31:35<14:45:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3778/12318 [6:31:39<14:45:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3778/12318 [6:31:39<14:45:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3779/12318 [6:31:46<14:45:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3779/12318 [6:31:46<14:45:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3780/12318 [6:31:48<14:44:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3780/12318 [6:31:48<14:44:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3781/12318 [6:31:55<14:44:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3781/12318 [6:31:55<14:44:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3782/12318 [6:31:59<14:44:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3782/12318 [6:31:59<14:44:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3783/12318 [6:32:05<14:44:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3783/12318 [6:32:05<14:44:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3784/12318 [6:32:12<14:44:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3784/12318 [6:32:12<14:44:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3785/12318 [6:32:16<14:44:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3785/12318 [6:32:16<14:44:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3786/12318 [6:32:24<14:44:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3786/12318 [6:32:24<14:44:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3787/12318 [6:32:33<14:44:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3787/12318 [6:32:33<14:44:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3788/12318 [6:32:35<14:44:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3788/12318 [6:32:35<14:44:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3789/12318 [6:32:39<14:43:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3789/12318 [6:32:39<14:43:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3790/12318 [6:32:42<14:43:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3790/12318 [6:32:42<14:43:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3791/12318 [6:32:46<14:43:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3791/12318 [6:32:46<14:43:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3792/12318 [6:32:47<14:43:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3792/12318 [6:32:47<14:43:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3793/12318 [6:32:55<14:43:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3793/12318 [6:32:55<14:43:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3794/12318 [6:32:56<14:42:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3794/12318 [6:32:56<14:42:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3795/12318 [6:33:03<14:42:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3795/12318 [6:33:03<14:42:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3796/12318 [6:33:08<14:42:35,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3796/12318 [6:33:08<14:42:35,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3797/12318 [6:33:09<14:42:19,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3797/12318 [6:33:09<14:42:19,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3798/12318 [6:33:15<14:42:11,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3798/12318 [6:33:15<14:42:11,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3799/12318 [6:33:22<14:42:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3799/12318 [6:33:22<14:42:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3800/12318 [6:33:23<14:41:49,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3800/12318 [6:33:23<14:41:49,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3801/12318 [6:33:25<14:41:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3801/12318 [6:33:25<14:41:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3802/12318 [6:33:32<14:41:29,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3802/12318 [6:33:32<14:41:29,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3803/12318 [6:33:34<14:41:13,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3803/12318 [6:33:34<14:41:13,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3804/12318 [6:33:42<14:41:11,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3804/12318 [6:33:42<14:41:11,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3805/12318 [6:33:43<14:40:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3805/12318 [6:33:43<14:40:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3806/12318 [6:33:48<14:40:44,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3806/12318 [6:33:48<14:40:44,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3807/12318 [6:33:55<14:40:39,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3807/12318 [6:33:55<14:40:39,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3808/12318 [6:34:31<14:41:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3808/12318 [6:34:31<14:41:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3809/12318 [6:34:40<14:41:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3809/12318 [6:34:40<14:41:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3810/12318 [6:34:46<14:41:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3810/12318 [6:34:46<14:41:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3811/12318 [6:34:51<14:41:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3811/12318 [6:34:51<14:41:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3812/12318 [6:34:56<14:41:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3812/12318 [6:34:56<14:41:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3813/12318 [6:34:59<14:41:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3813/12318 [6:34:59<14:41:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3814/12318 [6:35:06<14:40:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3814/12318 [6:35:06<14:40:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3815/12318 [6:35:11<14:40:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3815/12318 [6:35:11<14:40:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3816/12318 [6:35:20<14:40:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3816/12318 [6:35:20<14:40:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3817/12318 [6:35:22<14:40:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3817/12318 [6:35:22<14:40:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3818/12318 [6:35:24<14:40:17,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3818/12318 [6:35:24<14:40:17,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3819/12318 [6:35:29<14:40:09,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3819/12318 [6:35:29<14:40:09,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3820/12318 [6:35:37<14:40:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3820/12318 [6:35:37<14:40:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3821/12318 [6:35:46<14:40:05,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3821/12318 [6:35:46<14:40:05,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3822/12318 [6:35:51<14:39:57,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3822/12318 [6:35:51<14:39:57,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3823/12318 [6:35:58<14:39:52,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3823/12318 [6:35:58<14:39:52,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3824/12318 [6:36:02<14:39:41,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3824/12318 [6:36:02<14:39:41,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3825/12318 [6:36:07<14:39:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3825/12318 [6:36:07<14:39:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3826/12318 [6:36:16<14:39:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3826/12318 [6:36:16<14:39:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3827/12318 [6:36:18<14:39:16,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3827/12318 [6:36:18<14:39:16,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3828/12318 [6:36:22<14:39:07,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3828/12318 [6:36:22<14:39:07,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3829/12318 [6:36:24<14:38:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3829/12318 [6:36:24<14:38:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3830/12318 [6:36:25<14:38:33,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3830/12318 [6:36:25<14:38:33,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3831/12318 [6:36:26<14:38:16,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3831/12318 [6:36:26<14:38:16,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3832/12318 [6:36:29<14:38:01,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3832/12318 [6:36:29<14:38:01,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3833/12318 [6:36:32<14:37:47,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3833/12318 [6:36:32<14:37:47,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3834/12318 [6:36:33<14:37:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3834/12318 [6:36:33<14:37:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3835/12318 [6:36:35<14:37:16,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3835/12318 [6:36:35<14:37:16,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3836/12318 [6:36:37<14:37:00,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3836/12318 [6:36:37<14:37:00,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3837/12318 [6:36:45<14:36:57,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3837/12318 [6:36:45<14:36:57,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3838/12318 [6:36:49<14:36:47,  6.20s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3838/12318 [6:36:49<14:36:47,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3839/12318 [6:36:57<14:36:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3839/12318 [6:36:57<14:36:44,  6.20s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3840/12318 [6:38:14<14:39:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3840/12318 [6:38:14<14:39:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3841/12318 [6:38:19<14:39:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3841/12318 [6:38:19<14:39:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3842/12318 [6:38:23<14:38:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3842/12318 [6:38:23<14:38:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3843/12318 [6:38:31<14:38:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3843/12318 [6:38:31<14:38:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3844/12318 [6:38:33<14:38:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3844/12318 [6:38:33<14:38:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3845/12318 [6:38:40<14:38:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3845/12318 [6:38:40<14:38:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3846/12318 [6:38:41<14:38:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3846/12318 [6:38:41<14:38:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3847/12318 [6:38:44<14:38:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3847/12318 [6:38:44<14:38:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3848/12318 [6:38:47<14:37:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3848/12318 [6:38:47<14:37:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3849/12318 [6:38:54<14:37:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3849/12318 [6:38:54<14:37:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3850/12318 [6:39:03<14:37:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3850/12318 [6:39:03<14:37:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3851/12318 [6:39:12<14:37:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3851/12318 [6:39:12<14:37:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3852/12318 [6:39:15<14:37:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3852/12318 [6:39:15<14:37:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3853/12318 [6:39:22<14:37:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3853/12318 [6:39:22<14:37:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3854/12318 [6:39:28<14:37:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3854/12318 [6:39:28<14:37:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3855/12318 [6:39:36<14:37:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3855/12318 [6:39:36<14:37:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3856/12318 [6:39:45<14:37:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3856/12318 [6:39:45<14:37:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3857/12318 [6:39:50<14:37:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3857/12318 [6:39:50<14:37:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3858/12318 [6:39:56<14:37:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3858/12318 [6:39:56<14:37:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3859/12318 [6:40:04<14:36:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3859/12318 [6:40:04<14:36:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3860/12318 [6:40:09<14:36:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3860/12318 [6:40:09<14:36:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3861/12318 [6:40:14<14:36:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3861/12318 [6:40:14<14:36:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3862/12318 [6:40:20<14:36:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3862/12318 [6:40:20<14:36:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3863/12318 [6:40:25<14:36:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3863/12318 [6:40:25<14:36:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3864/12318 [6:40:27<14:36:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3864/12318 [6:40:27<14:36:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3865/12318 [6:40:28<14:35:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3865/12318 [6:40:28<14:35:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3866/12318 [6:40:35<14:35:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3866/12318 [6:40:35<14:35:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3867/12318 [6:40:39<14:35:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3867/12318 [6:40:39<14:35:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3868/12318 [6:40:47<14:35:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3868/12318 [6:40:48<14:35:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3869/12318 [6:40:56<14:35:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3869/12318 [6:40:56<14:35:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3870/12318 [6:40:59<14:35:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3870/12318 [6:40:59<14:35:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3871/12318 [6:41:04<14:35:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3871/12318 [6:41:04<14:35:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3872/12318 [6:41:33<14:35:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3872/12318 [6:41:33<14:35:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3873/12318 [6:41:37<14:35:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3873/12318 [6:41:37<14:35:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3874/12318 [6:41:42<14:35:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3874/12318 [6:41:42<14:35:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3875/12318 [6:41:46<14:35:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3875/12318 [6:41:46<14:35:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3876/12318 [6:41:47<14:35:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3876/12318 [6:41:47<14:35:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3877/12318 [6:41:48<14:34:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3877/12318 [6:41:48<14:34:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3878/12318 [6:41:51<14:34:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3878/12318 [6:41:51<14:34:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3879/12318 [6:42:00<14:34:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3879/12318 [6:42:00<14:34:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  31%|▎| 3880/12318 [6:42:09<14:34:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  31%|▎| 3880/12318 [6:42:09<14:34:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3881/12318 [6:42:13<14:34:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3881/12318 [6:42:13<14:34:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3882/12318 [6:42:19<14:34:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3882/12318 [6:42:19<14:34:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3883/12318 [6:42:25<14:34:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3883/12318 [6:42:25<14:34:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3884/12318 [6:42:31<14:34:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3884/12318 [6:42:31<14:34:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3885/12318 [6:42:39<14:34:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3885/12318 [6:42:39<14:34:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3886/12318 [6:42:42<14:33:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3886/12318 [6:42:42<14:33:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3887/12318 [6:42:47<14:33:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3887/12318 [6:42:47<14:33:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3888/12318 [6:42:52<14:33:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3888/12318 [6:42:52<14:33:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3889/12318 [6:42:59<14:33:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3889/12318 [6:42:59<14:33:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3890/12318 [6:43:04<14:33:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3890/12318 [6:43:04<14:33:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3891/12318 [6:43:08<14:33:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3891/12318 [6:43:08<14:33:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3892/12318 [6:43:16<14:33:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3892/12318 [6:43:16<14:33:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3893/12318 [6:43:22<14:32:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3893/12318 [6:43:22<14:32:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3894/12318 [6:43:26<14:32:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3894/12318 [6:43:26<14:32:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3895/12318 [6:43:30<14:32:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3895/12318 [6:43:30<14:32:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3896/12318 [6:43:36<14:32:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3896/12318 [6:43:36<14:32:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3897/12318 [6:43:41<14:32:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3897/12318 [6:43:41<14:32:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3898/12318 [6:43:44<14:32:06,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3898/12318 [6:43:44<14:32:06,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3899/12318 [6:43:47<14:31:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3899/12318 [6:43:47<14:31:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3900/12318 [6:43:55<14:31:50,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3900/12318 [6:43:55<14:31:50,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3901/12318 [6:43:57<14:31:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3901/12318 [6:43:57<14:31:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3902/12318 [6:44:06<14:31:36,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3902/12318 [6:44:06<14:31:36,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3903/12318 [6:44:13<14:31:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3903/12318 [6:44:13<14:31:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3904/12318 [6:44:58<14:32:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3904/12318 [6:44:58<14:32:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3905/12318 [6:45:04<14:32:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3905/12318 [6:45:04<14:32:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3906/12318 [6:45:10<14:32:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3906/12318 [6:45:10<14:32:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3907/12318 [6:45:17<14:32:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3907/12318 [6:45:17<14:32:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3908/12318 [6:45:23<14:32:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3908/12318 [6:45:23<14:32:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3909/12318 [6:45:28<14:32:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3909/12318 [6:45:28<14:32:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3910/12318 [6:45:37<14:32:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3910/12318 [6:45:37<14:32:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3911/12318 [6:45:45<14:32:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3911/12318 [6:45:45<14:32:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3912/12318 [6:45:50<14:32:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3912/12318 [6:45:50<14:32:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3913/12318 [6:45:55<14:31:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3913/12318 [6:45:55<14:31:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3914/12318 [6:46:01<14:31:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3914/12318 [6:46:01<14:31:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3915/12318 [6:46:04<14:31:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3915/12318 [6:46:04<14:31:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3916/12318 [6:46:12<14:31:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3916/12318 [6:46:12<14:31:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3917/12318 [6:46:16<14:31:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3917/12318 [6:46:16<14:31:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3918/12318 [6:46:21<14:31:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3918/12318 [6:46:21<14:31:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3919/12318 [6:46:26<14:31:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3919/12318 [6:46:26<14:31:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3920/12318 [6:46:34<14:31:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3920/12318 [6:46:34<14:31:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3921/12318 [6:46:38<14:30:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3921/12318 [6:46:38<14:30:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3922/12318 [6:46:46<14:30:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3922/12318 [6:46:46<14:30:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3923/12318 [6:46:49<14:30:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3923/12318 [6:46:49<14:30:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3924/12318 [6:46:55<14:30:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3924/12318 [6:46:55<14:30:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3925/12318 [6:46:59<14:30:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3925/12318 [6:46:59<14:30:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3926/12318 [6:47:02<14:30:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3926/12318 [6:47:02<14:30:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3927/12318 [6:47:09<14:29:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3927/12318 [6:47:09<14:29:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3928/12318 [6:47:11<14:29:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3928/12318 [6:47:11<14:29:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3929/12318 [6:47:19<14:29:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3929/12318 [6:47:19<14:29:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3930/12318 [6:47:24<14:29:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3930/12318 [6:47:24<14:29:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3931/12318 [6:47:27<14:29:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3931/12318 [6:47:27<14:29:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3932/12318 [6:47:35<14:29:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3932/12318 [6:47:35<14:29:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3933/12318 [6:47:37<14:29:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3933/12318 [6:47:37<14:29:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3934/12318 [6:47:44<14:28:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3934/12318 [6:47:44<14:28:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3935/12318 [6:47:46<14:28:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3935/12318 [6:47:46<14:28:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3936/12318 [6:48:19<14:29:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3936/12318 [6:48:19<14:29:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3937/12318 [6:48:22<14:29:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3937/12318 [6:48:22<14:29:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3938/12318 [6:48:29<14:29:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3938/12318 [6:48:29<14:29:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3939/12318 [6:48:36<14:29:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3939/12318 [6:48:36<14:29:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3940/12318 [6:48:40<14:29:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3940/12318 [6:48:40<14:29:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3941/12318 [6:48:43<14:28:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3941/12318 [6:48:43<14:28:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3942/12318 [6:48:52<14:28:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3942/12318 [6:48:52<14:28:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3943/12318 [6:49:00<14:28:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3943/12318 [6:49:00<14:28:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3944/12318 [6:49:02<14:28:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3944/12318 [6:49:02<14:28:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3945/12318 [6:49:09<14:28:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3945/12318 [6:49:09<14:28:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3946/12318 [6:49:14<14:28:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3946/12318 [6:49:14<14:28:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3947/12318 [6:49:19<14:28:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3947/12318 [6:49:19<14:28:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3948/12318 [6:49:22<14:27:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3948/12318 [6:49:22<14:27:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3949/12318 [6:49:30<14:27:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3949/12318 [6:49:30<14:27:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3950/12318 [6:49:33<14:27:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3950/12318 [6:49:33<14:27:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3951/12318 [6:49:40<14:27:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3951/12318 [6:49:40<14:27:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3952/12318 [6:49:49<14:27:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3952/12318 [6:49:49<14:27:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3953/12318 [6:49:50<14:27:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3953/12318 [6:49:50<14:27:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3954/12318 [6:49:53<14:27:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3954/12318 [6:49:53<14:27:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3955/12318 [6:50:00<14:26:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3955/12318 [6:50:00<14:26:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3956/12318 [6:50:05<14:26:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3956/12318 [6:50:05<14:26:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3957/12318 [6:50:07<14:26:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3957/12318 [6:50:07<14:26:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3958/12318 [6:50:10<14:26:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3958/12318 [6:50:10<14:26:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3959/12318 [6:50:18<14:26:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3959/12318 [6:50:18<14:26:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3960/12318 [6:50:25<14:26:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3960/12318 [6:50:25<14:26:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3961/12318 [6:50:32<14:26:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3961/12318 [6:50:32<14:26:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3962/12318 [6:50:37<14:26:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3962/12318 [6:50:37<14:26:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3963/12318 [6:50:43<14:25:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3963/12318 [6:50:43<14:25:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3964/12318 [6:50:46<14:25:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3964/12318 [6:50:46<14:25:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3965/12318 [6:50:51<14:25:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3965/12318 [6:50:51<14:25:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3966/12318 [6:50:57<14:25:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3966/12318 [6:50:57<14:25:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3967/12318 [6:51:01<14:25:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3967/12318 [6:51:01<14:25:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3968/12318 [6:51:28<14:25:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3968/12318 [6:51:28<14:25:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3969/12318 [6:51:36<14:25:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3969/12318 [6:51:36<14:25:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3970/12318 [6:51:42<14:25:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3970/12318 [6:51:42<14:25:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3971/12318 [6:51:45<14:25:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3971/12318 [6:51:45<14:25:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3972/12318 [6:51:49<14:25:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3972/12318 [6:51:49<14:25:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3973/12318 [6:51:50<14:25:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3973/12318 [6:51:50<14:25:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3974/12318 [6:51:53<14:24:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3974/12318 [6:51:53<14:24:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3975/12318 [6:52:00<14:24:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3975/12318 [6:52:00<14:24:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3976/12318 [6:52:06<14:24:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3976/12318 [6:52:06<14:24:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3977/12318 [6:52:10<14:24:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3977/12318 [6:52:10<14:24:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3978/12318 [6:52:19<14:24:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3978/12318 [6:52:19<14:24:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3979/12318 [6:52:22<14:24:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3979/12318 [6:52:22<14:24:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3980/12318 [6:52:23<14:23:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3980/12318 [6:52:23<14:23:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3981/12318 [6:52:27<14:23:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3981/12318 [6:52:27<14:23:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3982/12318 [6:52:31<14:23:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3982/12318 [6:52:31<14:23:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3983/12318 [6:52:36<14:23:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3983/12318 [6:52:36<14:23:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3984/12318 [6:52:45<14:23:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3984/12318 [6:52:45<14:23:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3985/12318 [6:52:48<14:23:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3985/12318 [6:52:48<14:23:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3986/12318 [6:52:54<14:23:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3986/12318 [6:52:54<14:23:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3987/12318 [6:53:01<14:23:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3987/12318 [6:53:01<14:23:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3988/12318 [6:53:04<14:22:48,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3988/12318 [6:53:04<14:22:48,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3989/12318 [6:53:05<14:22:32,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3989/12318 [6:53:05<14:22:32,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3990/12318 [6:53:14<14:22:31,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3990/12318 [6:53:14<14:22:31,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3991/12318 [6:53:18<14:22:21,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3991/12318 [6:53:18<14:22:21,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3992/12318 [6:53:24<14:22:13,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3992/12318 [6:53:24<14:22:13,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3993/12318 [6:53:28<14:22:02,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3993/12318 [6:53:28<14:22:02,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3994/12318 [6:53:36<14:21:59,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3994/12318 [6:53:36<14:21:59,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3995/12318 [6:53:44<14:21:58,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3995/12318 [6:53:44<14:21:58,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3996/12318 [6:53:51<14:21:53,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3996/12318 [6:53:51<14:21:53,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3997/12318 [6:53:55<14:21:42,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3997/12318 [6:53:55<14:21:42,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3998/12318 [6:53:57<14:21:28,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3998/12318 [6:53:57<14:21:28,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 3999/12318 [6:53:59<14:21:12,  6.21s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 3999/12318 [6:53:59<14:21:12,  6.21s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 4000/12318 [6:54:58<14:22:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 4000/12318 [6:54:58<14:22:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 4001/12318 [6:55:26<14:23:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 4001/12318 [6:55:26<14:23:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 4002/12318 [6:55:31<14:23:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 4002/12318 [6:55:31<14:23:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  32%|▎| 4003/12318 [6:55:37<14:23:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  32%|▎| 4003/12318 [6:55:37<14:23:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4004/12318 [6:55:43<14:23:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4004/12318 [6:55:43<14:23:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4005/12318 [6:55:48<14:23:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4005/12318 [6:55:48<14:23:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4006/12318 [6:55:52<14:22:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4006/12318 [6:55:52<14:22:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4007/12318 [6:55:55<14:22:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4007/12318 [6:55:55<14:22:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4008/12318 [6:55:59<14:22:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4008/12318 [6:55:59<14:22:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4009/12318 [6:56:07<14:22:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4009/12318 [6:56:07<14:22:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4010/12318 [6:56:16<14:22:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4010/12318 [6:56:16<14:22:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4011/12318 [6:56:20<14:22:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4011/12318 [6:56:20<14:22:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4012/12318 [6:56:27<14:22:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4012/12318 [6:56:27<14:22:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4013/12318 [6:56:35<14:22:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4013/12318 [6:56:35<14:22:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4014/12318 [6:56:43<14:22:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4014/12318 [6:56:43<14:22:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4015/12318 [6:56:47<14:21:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4015/12318 [6:56:47<14:21:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4016/12318 [6:56:56<14:21:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4016/12318 [6:56:56<14:21:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4017/12318 [6:57:00<14:21:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4017/12318 [6:57:00<14:21:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4018/12318 [6:57:06<14:21:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4018/12318 [6:57:06<14:21:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4019/12318 [6:57:14<14:21:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4019/12318 [6:57:14<14:21:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4020/12318 [6:57:20<14:21:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4020/12318 [6:57:20<14:21:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4021/12318 [6:57:26<14:21:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4021/12318 [6:57:26<14:21:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4022/12318 [6:57:35<14:21:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4022/12318 [6:57:35<14:21:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4023/12318 [6:57:43<14:21:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4023/12318 [6:57:43<14:21:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4024/12318 [6:57:46<14:21:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4024/12318 [6:57:46<14:21:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4025/12318 [6:57:53<14:21:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4025/12318 [6:57:53<14:21:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4026/12318 [6:57:57<14:20:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4026/12318 [6:57:57<14:20:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4027/12318 [6:58:00<14:20:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4027/12318 [6:58:00<14:20:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4028/12318 [6:58:05<14:20:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4028/12318 [6:58:05<14:20:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4029/12318 [6:58:07<14:20:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4029/12318 [6:58:07<14:20:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4030/12318 [6:58:11<14:20:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4030/12318 [6:58:11<14:20:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4031/12318 [6:58:12<14:19:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4031/12318 [6:58:12<14:19:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4032/12318 [6:58:52<14:20:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4032/12318 [6:58:52<14:20:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4033/12318 [6:59:00<14:20:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4033/12318 [6:59:00<14:20:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4034/12318 [6:59:08<14:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4034/12318 [6:59:08<14:20:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4035/12318 [6:59:11<14:20:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4035/12318 [6:59:11<14:20:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4036/12318 [6:59:17<14:20:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4036/12318 [6:59:17<14:20:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4037/12318 [6:59:22<14:20:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4037/12318 [6:59:22<14:20:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4038/12318 [6:59:28<14:20:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4038/12318 [6:59:28<14:20:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4039/12318 [6:59:34<14:20:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4039/12318 [6:59:34<14:20:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4040/12318 [6:59:38<14:19:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4040/12318 [6:59:38<14:19:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4041/12318 [6:59:42<14:19:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4041/12318 [6:59:42<14:19:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4042/12318 [6:59:45<14:19:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4042/12318 [6:59:45<14:19:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4043/12318 [6:59:53<14:19:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4043/12318 [6:59:53<14:19:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4044/12318 [6:59:59<14:19:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4044/12318 [6:59:59<14:19:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4045/12318 [7:00:04<14:19:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4045/12318 [7:00:04<14:19:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4046/12318 [7:00:07<14:18:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4046/12318 [7:00:07<14:18:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4047/12318 [7:00:10<14:18:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4047/12318 [7:00:10<14:18:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4048/12318 [7:00:16<14:18:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4048/12318 [7:00:16<14:18:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4049/12318 [7:00:25<14:18:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4049/12318 [7:00:25<14:18:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4050/12318 [7:00:26<14:18:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4050/12318 [7:00:26<14:18:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4051/12318 [7:00:33<14:18:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4051/12318 [7:00:33<14:18:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4052/12318 [7:00:37<14:18:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4052/12318 [7:00:37<14:18:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4053/12318 [7:00:43<14:17:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4053/12318 [7:00:43<14:17:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4054/12318 [7:00:52<14:17:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4054/12318 [7:00:52<14:17:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4055/12318 [7:00:55<14:17:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4055/12318 [7:00:55<14:17:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4056/12318 [7:00:59<14:17:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4056/12318 [7:00:59<14:17:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4057/12318 [7:01:03<14:17:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4057/12318 [7:01:03<14:17:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4058/12318 [7:01:06<14:17:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4058/12318 [7:01:06<14:17:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4059/12318 [7:01:14<14:17:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4059/12318 [7:01:14<14:17:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4060/12318 [7:01:22<14:17:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4060/12318 [7:01:22<14:17:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4061/12318 [7:01:25<14:16:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4061/12318 [7:01:25<14:16:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4062/12318 [7:01:34<14:16:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4062/12318 [7:01:34<14:16:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4063/12318 [7:01:36<14:16:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4063/12318 [7:01:36<14:16:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4064/12318 [7:02:09<14:17:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4064/12318 [7:02:09<14:17:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4065/12318 [7:02:17<14:17:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4065/12318 [7:02:17<14:17:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4066/12318 [7:02:20<14:17:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4066/12318 [7:02:20<14:17:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4067/12318 [7:02:28<14:17:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4067/12318 [7:02:28<14:17:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4068/12318 [7:02:33<14:16:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4068/12318 [7:02:33<14:16:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4069/12318 [7:02:35<14:16:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4069/12318 [7:02:35<14:16:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4070/12318 [7:02:40<14:16:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4070/12318 [7:02:40<14:16:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4071/12318 [7:02:48<14:16:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4071/12318 [7:02:48<14:16:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4072/12318 [7:02:55<14:16:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4072/12318 [7:02:55<14:16:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4073/12318 [7:03:00<14:16:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4073/12318 [7:03:00<14:16:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4074/12318 [7:03:05<14:16:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4074/12318 [7:03:05<14:16:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4075/12318 [7:03:13<14:16:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4075/12318 [7:03:13<14:16:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4076/12318 [7:03:19<14:15:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4076/12318 [7:03:19<14:15:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4077/12318 [7:03:25<14:15:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4077/12318 [7:03:25<14:15:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4078/12318 [7:03:34<14:15:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4078/12318 [7:03:34<14:15:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4079/12318 [7:03:37<14:15:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4079/12318 [7:03:37<14:15:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4080/12318 [7:03:41<14:15:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4080/12318 [7:03:41<14:15:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4081/12318 [7:03:47<14:15:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4081/12318 [7:03:47<14:15:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4082/12318 [7:03:50<14:15:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4082/12318 [7:03:50<14:15:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4083/12318 [7:03:59<14:15:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4083/12318 [7:03:59<14:15:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4084/12318 [7:04:05<14:15:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4084/12318 [7:04:05<14:15:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4085/12318 [7:04:14<14:15:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4085/12318 [7:04:14<14:15:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4086/12318 [7:04:23<14:15:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4086/12318 [7:04:23<14:15:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4087/12318 [7:04:26<14:14:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4087/12318 [7:04:26<14:14:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4088/12318 [7:04:28<14:14:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4088/12318 [7:04:28<14:14:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4089/12318 [7:04:33<14:14:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4089/12318 [7:04:33<14:14:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4090/12318 [7:04:42<14:14:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4090/12318 [7:04:42<14:14:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4091/12318 [7:04:47<14:14:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4091/12318 [7:04:47<14:14:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4092/12318 [7:04:52<14:14:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4092/12318 [7:04:52<14:14:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4093/12318 [7:04:56<14:13:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4093/12318 [7:04:56<14:13:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4094/12318 [7:05:05<14:13:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4094/12318 [7:05:05<14:13:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4095/12318 [7:05:07<14:13:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4095/12318 [7:05:07<14:13:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4096/12318 [7:05:20<14:13:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4096/12318 [7:05:20<14:13:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4097/12318 [7:05:29<14:13:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4097/12318 [7:05:29<14:13:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4098/12318 [7:05:38<14:13:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4098/12318 [7:05:38<14:13:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4099/12318 [7:05:41<14:13:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4099/12318 [7:05:41<14:13:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4100/12318 [7:05:47<14:13:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4100/12318 [7:05:47<14:13:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4101/12318 [7:05:55<14:13:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4101/12318 [7:05:55<14:13:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4102/12318 [7:06:01<14:13:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4102/12318 [7:06:01<14:13:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4103/12318 [7:06:05<14:13:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4103/12318 [7:06:05<14:13:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4104/12318 [7:06:09<14:12:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4104/12318 [7:06:09<14:12:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4105/12318 [7:06:14<14:12:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4105/12318 [7:06:14<14:12:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4106/12318 [7:06:18<14:12:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4106/12318 [7:06:18<14:12:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4107/12318 [7:06:23<14:12:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4107/12318 [7:06:23<14:12:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4108/12318 [7:06:28<14:12:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4108/12318 [7:06:28<14:12:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4109/12318 [7:06:33<14:12:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4109/12318 [7:06:33<14:12:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4110/12318 [7:06:38<14:12:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4110/12318 [7:06:38<14:12:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4111/12318 [7:06:43<14:11:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4111/12318 [7:06:43<14:11:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4112/12318 [7:06:48<14:11:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4112/12318 [7:06:48<14:11:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4113/12318 [7:06:56<14:11:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4113/12318 [7:06:56<14:11:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4114/12318 [7:07:01<14:11:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4114/12318 [7:07:01<14:11:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4115/12318 [7:07:06<14:11:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4115/12318 [7:07:06<14:11:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4116/12318 [7:07:13<14:11:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4116/12318 [7:07:13<14:11:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4117/12318 [7:07:18<14:11:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4117/12318 [7:07:18<14:11:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4118/12318 [7:07:23<14:11:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4118/12318 [7:07:23<14:11:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4119/12318 [7:07:28<14:10:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4119/12318 [7:07:28<14:10:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4120/12318 [7:07:36<14:10:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4120/12318 [7:07:36<14:10:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4121/12318 [7:07:42<14:10:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4121/12318 [7:07:42<14:10:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4122/12318 [7:07:43<14:10:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4122/12318 [7:07:43<14:10:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4123/12318 [7:07:51<14:10:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4123/12318 [7:07:51<14:10:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4124/12318 [7:07:55<14:10:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4124/12318 [7:07:55<14:10:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4125/12318 [7:08:00<14:10:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4125/12318 [7:08:00<14:10:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  33%|▎| 4126/12318 [7:08:08<14:10:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  33%|▎| 4126/12318 [7:08:08<14:10:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4127/12318 [7:08:16<14:09:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4127/12318 [7:08:16<14:09:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4128/12318 [7:08:47<14:10:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4128/12318 [7:08:47<14:10:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4129/12318 [7:08:55<14:10:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4129/12318 [7:08:55<14:10:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4130/12318 [7:09:00<14:10:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4130/12318 [7:09:00<14:10:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4131/12318 [7:09:07<14:10:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4131/12318 [7:09:07<14:10:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4132/12318 [7:09:12<14:10:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4132/12318 [7:09:12<14:10:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4133/12318 [7:09:19<14:10:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4133/12318 [7:09:19<14:10:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4134/12318 [7:09:27<14:10:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4134/12318 [7:09:27<14:10:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4135/12318 [7:09:30<14:09:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4135/12318 [7:09:30<14:09:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4136/12318 [7:09:34<14:09:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4136/12318 [7:09:34<14:09:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4137/12318 [7:09:42<14:09:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4137/12318 [7:09:42<14:09:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4138/12318 [7:09:51<14:09:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4138/12318 [7:09:51<14:09:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4139/12318 [7:09:57<14:09:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4139/12318 [7:09:57<14:09:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4140/12318 [7:10:05<14:09:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4140/12318 [7:10:05<14:09:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4141/12318 [7:10:14<14:09:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4141/12318 [7:10:14<14:09:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4142/12318 [7:10:15<14:09:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4142/12318 [7:10:15<14:09:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4143/12318 [7:10:19<14:09:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4143/12318 [7:10:19<14:09:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4144/12318 [7:10:25<14:09:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4144/12318 [7:10:25<14:09:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4145/12318 [7:10:29<14:08:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4145/12318 [7:10:29<14:08:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4146/12318 [7:10:34<14:08:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4146/12318 [7:10:34<14:08:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4147/12318 [7:10:41<14:08:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4147/12318 [7:10:41<14:08:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4148/12318 [7:10:46<14:08:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4148/12318 [7:10:46<14:08:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4149/12318 [7:10:55<14:08:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4149/12318 [7:10:55<14:08:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4150/12318 [7:11:01<14:08:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4150/12318 [7:11:01<14:08:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4151/12318 [7:11:05<14:08:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4151/12318 [7:11:05<14:08:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4152/12318 [7:11:12<14:08:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4152/12318 [7:11:12<14:08:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4153/12318 [7:11:17<14:07:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4153/12318 [7:11:17<14:07:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4154/12318 [7:11:19<14:07:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4154/12318 [7:11:19<14:07:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4155/12318 [7:11:28<14:07:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4155/12318 [7:11:28<14:07:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4156/12318 [7:11:32<14:07:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4156/12318 [7:11:32<14:07:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4157/12318 [7:11:35<14:07:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4157/12318 [7:11:35<14:07:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4158/12318 [7:11:39<14:07:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4158/12318 [7:11:39<14:07:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4159/12318 [7:11:47<14:07:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4159/12318 [7:11:47<14:07:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4160/12318 [7:12:05<14:07:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4160/12318 [7:12:05<14:07:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4161/12318 [7:12:09<14:07:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4161/12318 [7:12:09<14:07:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4162/12318 [7:12:15<14:07:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4162/12318 [7:12:15<14:07:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4163/12318 [7:12:23<14:07:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4163/12318 [7:12:23<14:07:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4164/12318 [7:12:25<14:06:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4164/12318 [7:12:25<14:06:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4165/12318 [7:12:30<14:06:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4165/12318 [7:12:30<14:06:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4166/12318 [7:12:37<14:06:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4166/12318 [7:12:37<14:06:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4167/12318 [7:12:38<14:06:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4167/12318 [7:12:38<14:06:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4168/12318 [7:12:43<14:06:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4168/12318 [7:12:43<14:06:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4169/12318 [7:12:47<14:05:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4169/12318 [7:12:47<14:05:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4170/12318 [7:12:55<14:05:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4170/12318 [7:12:55<14:05:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4171/12318 [7:13:01<14:05:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4171/12318 [7:13:01<14:05:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4172/12318 [7:13:09<14:05:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4172/12318 [7:13:09<14:05:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4173/12318 [7:13:14<14:05:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4173/12318 [7:13:14<14:05:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4174/12318 [7:13:21<14:05:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4174/12318 [7:13:21<14:05:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4175/12318 [7:13:28<14:05:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4175/12318 [7:13:28<14:05:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4176/12318 [7:13:29<14:05:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4176/12318 [7:13:29<14:05:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4177/12318 [7:13:33<14:05:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4177/12318 [7:13:33<14:05:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4178/12318 [7:13:37<14:04:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4178/12318 [7:13:37<14:04:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4179/12318 [7:13:44<14:04:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4179/12318 [7:13:44<14:04:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4180/12318 [7:13:53<14:04:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4180/12318 [7:13:53<14:04:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4181/12318 [7:13:58<14:04:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4181/12318 [7:13:58<14:04:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4182/12318 [7:13:59<14:04:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4182/12318 [7:13:59<14:04:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4183/12318 [7:14:05<14:04:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4183/12318 [7:14:05<14:04:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4184/12318 [7:14:08<14:03:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4184/12318 [7:14:08<14:03:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4185/12318 [7:14:10<14:03:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4185/12318 [7:14:10<14:03:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4186/12318 [7:14:12<14:03:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4186/12318 [7:14:12<14:03:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4187/12318 [7:14:17<14:03:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4187/12318 [7:14:17<14:03:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4188/12318 [7:14:20<14:03:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4188/12318 [7:14:20<14:03:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4189/12318 [7:14:24<14:02:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4189/12318 [7:14:24<14:02:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4190/12318 [7:14:28<14:02:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4190/12318 [7:14:28<14:02:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4191/12318 [7:14:34<14:02:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4191/12318 [7:14:34<14:02:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4192/12318 [7:15:28<14:04:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4192/12318 [7:15:28<14:04:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4193/12318 [7:15:32<14:03:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4193/12318 [7:15:32<14:03:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4194/12318 [7:15:38<14:03:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4194/12318 [7:15:38<14:03:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4195/12318 [7:15:42<14:03:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4195/12318 [7:15:42<14:03:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4196/12318 [7:15:48<14:03:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4196/12318 [7:15:48<14:03:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4197/12318 [7:15:49<14:03:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4197/12318 [7:15:49<14:03:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4198/12318 [7:15:53<14:03:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4198/12318 [7:15:53<14:03:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4199/12318 [7:16:01<14:03:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4199/12318 [7:16:01<14:03:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4200/12318 [7:16:06<14:02:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4200/12318 [7:16:06<14:02:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4201/12318 [7:16:09<14:02:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4201/12318 [7:16:09<14:02:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4202/12318 [7:16:16<14:02:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4202/12318 [7:16:16<14:02:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4203/12318 [7:16:18<14:02:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4203/12318 [7:16:18<14:02:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4204/12318 [7:16:26<14:02:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4204/12318 [7:16:26<14:02:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4205/12318 [7:16:28<14:02:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4205/12318 [7:16:28<14:02:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4206/12318 [7:16:37<14:02:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4206/12318 [7:16:37<14:02:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4207/12318 [7:16:39<14:01:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4207/12318 [7:16:39<14:01:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4208/12318 [7:16:44<14:01:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4208/12318 [7:16:44<14:01:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4209/12318 [7:16:51<14:01:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4209/12318 [7:16:51<14:01:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4210/12318 [7:16:56<14:01:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4210/12318 [7:16:56<14:01:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4211/12318 [7:17:03<14:01:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4211/12318 [7:17:03<14:01:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4212/12318 [7:17:10<14:01:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4212/12318 [7:17:10<14:01:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4213/12318 [7:17:13<14:01:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4213/12318 [7:17:13<14:01:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4214/12318 [7:17:21<14:01:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4214/12318 [7:17:21<14:01:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4215/12318 [7:17:30<14:01:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4215/12318 [7:17:30<14:01:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4216/12318 [7:17:32<14:00:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4216/12318 [7:17:32<14:00:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4217/12318 [7:17:34<14:00:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4217/12318 [7:17:34<14:00:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4218/12318 [7:17:38<14:00:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4218/12318 [7:17:38<14:00:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4219/12318 [7:17:46<14:00:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4219/12318 [7:17:46<14:00:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4220/12318 [7:17:51<14:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4220/12318 [7:17:51<14:00:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4221/12318 [7:17:59<14:00:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4221/12318 [7:17:59<14:00:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4222/12318 [7:18:03<13:59:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4222/12318 [7:18:03<13:59:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4223/12318 [7:18:11<13:59:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4223/12318 [7:18:11<13:59:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4224/12318 [7:18:42<14:00:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4224/12318 [7:18:42<14:00:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4225/12318 [7:18:46<14:00:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4225/12318 [7:18:46<14:00:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4226/12318 [7:18:52<14:00:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4226/12318 [7:18:52<14:00:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4227/12318 [7:19:01<14:00:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4227/12318 [7:19:01<14:00:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4228/12318 [7:19:08<14:00:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4228/12318 [7:19:08<14:00:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4229/12318 [7:19:14<14:00:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4229/12318 [7:19:14<14:00:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4230/12318 [7:19:19<14:00:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4230/12318 [7:19:19<14:00:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4231/12318 [7:19:28<13:59:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4231/12318 [7:19:28<13:59:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4232/12318 [7:19:31<13:59:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4232/12318 [7:19:31<13:59:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4233/12318 [7:19:39<13:59:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4233/12318 [7:19:39<13:59:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4234/12318 [7:19:46<13:59:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4234/12318 [7:19:46<13:59:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4235/12318 [7:19:55<13:59:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4235/12318 [7:19:55<13:59:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4236/12318 [7:20:03<13:59:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4236/12318 [7:20:03<13:59:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4237/12318 [7:20:06<13:59:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4237/12318 [7:20:06<13:59:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4238/12318 [7:20:11<13:59:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4238/12318 [7:20:11<13:59:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4239/12318 [7:20:19<13:59:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4239/12318 [7:20:19<13:59:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4240/12318 [7:20:25<13:59:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4240/12318 [7:20:25<13:59:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4241/12318 [7:20:30<13:58:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4241/12318 [7:20:30<13:58:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4242/12318 [7:20:34<13:58:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4242/12318 [7:20:34<13:58:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4243/12318 [7:20:37<13:58:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4243/12318 [7:20:37<13:58:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4244/12318 [7:20:42<13:58:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4244/12318 [7:20:42<13:58:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4245/12318 [7:20:47<13:58:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4245/12318 [7:20:47<13:58:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4246/12318 [7:20:53<13:58:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4246/12318 [7:20:53<13:58:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4247/12318 [7:20:55<13:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4247/12318 [7:20:55<13:57:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4248/12318 [7:20:57<13:57:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4248/12318 [7:20:57<13:57:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  34%|▎| 4249/12318 [7:21:03<13:57:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  34%|▎| 4249/12318 [7:21:03<13:57:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4250/12318 [7:21:09<13:57:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4250/12318 [7:21:09<13:57:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4251/12318 [7:21:15<13:57:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4251/12318 [7:21:15<13:57:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4252/12318 [7:21:23<13:57:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4252/12318 [7:21:23<13:57:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4253/12318 [7:21:29<13:57:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4253/12318 [7:21:29<13:57:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4254/12318 [7:21:35<13:57:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4254/12318 [7:21:35<13:57:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4255/12318 [7:21:43<13:57:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4255/12318 [7:21:43<13:57:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4256/12318 [7:22:03<13:57:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4256/12318 [7:22:03<13:57:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4257/12318 [7:22:08<13:57:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4257/12318 [7:22:08<13:57:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4258/12318 [7:22:12<13:57:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4258/12318 [7:22:12<13:57:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4259/12318 [7:22:19<13:56:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4259/12318 [7:22:19<13:56:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4260/12318 [7:22:26<13:56:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4260/12318 [7:22:26<13:56:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4261/12318 [7:22:29<13:56:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4261/12318 [7:22:29<13:56:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4262/12318 [7:22:35<13:56:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4262/12318 [7:22:35<13:56:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4263/12318 [7:22:36<13:56:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4263/12318 [7:22:36<13:56:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4264/12318 [7:22:38<13:56:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4264/12318 [7:22:38<13:56:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4265/12318 [7:22:45<13:55:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4265/12318 [7:22:45<13:55:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4266/12318 [7:22:51<13:55:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4266/12318 [7:22:51<13:55:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4267/12318 [7:22:56<13:55:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4267/12318 [7:22:56<13:55:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4268/12318 [7:23:00<13:55:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4268/12318 [7:23:00<13:55:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4269/12318 [7:23:05<13:55:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4269/12318 [7:23:05<13:55:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4270/12318 [7:23:10<13:55:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4270/12318 [7:23:10<13:55:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4271/12318 [7:23:13<13:55:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4271/12318 [7:23:13<13:55:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4272/12318 [7:23:19<13:54:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4272/12318 [7:23:19<13:54:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4273/12318 [7:23:27<13:54:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4273/12318 [7:23:27<13:54:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4274/12318 [7:23:36<13:54:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4274/12318 [7:23:36<13:54:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4275/12318 [7:23:44<13:54:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4275/12318 [7:23:44<13:54:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4276/12318 [7:23:50<13:54:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4276/12318 [7:23:50<13:54:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4277/12318 [7:23:57<13:54:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4277/12318 [7:23:57<13:54:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4278/12318 [7:23:58<13:54:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4278/12318 [7:23:58<13:54:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4279/12318 [7:24:02<13:54:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4279/12318 [7:24:02<13:54:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4280/12318 [7:24:03<13:53:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4280/12318 [7:24:03<13:53:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4281/12318 [7:24:08<13:53:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4281/12318 [7:24:08<13:53:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4282/12318 [7:24:09<13:53:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4282/12318 [7:24:09<13:53:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4283/12318 [7:24:14<13:53:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4283/12318 [7:24:14<13:53:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4284/12318 [7:24:16<13:53:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4284/12318 [7:24:16<13:53:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4285/12318 [7:24:22<13:53:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4285/12318 [7:24:22<13:53:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4286/12318 [7:24:23<13:52:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4286/12318 [7:24:23<13:52:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4287/12318 [7:24:25<13:52:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4287/12318 [7:24:25<13:52:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4288/12318 [7:25:20<13:53:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4288/12318 [7:25:20<13:53:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4289/12318 [7:25:28<13:53:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4289/12318 [7:25:28<13:53:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4290/12318 [7:25:33<13:53:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4290/12318 [7:25:33<13:53:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4291/12318 [7:25:38<13:53:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4291/12318 [7:25:38<13:53:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4292/12318 [7:25:40<13:53:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4292/12318 [7:25:40<13:53:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4293/12318 [7:25:46<13:53:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4293/12318 [7:25:46<13:53:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4294/12318 [7:25:54<13:53:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4294/12318 [7:25:54<13:53:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4295/12318 [7:26:03<13:53:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4295/12318 [7:26:03<13:53:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4296/12318 [7:26:09<13:53:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4296/12318 [7:26:09<13:53:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4297/12318 [7:26:13<13:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4297/12318 [7:26:13<13:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4298/12318 [7:26:20<13:52:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4298/12318 [7:26:20<13:52:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4299/12318 [7:26:23<13:52:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4299/12318 [7:26:23<13:52:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4300/12318 [7:26:27<13:52:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4300/12318 [7:26:27<13:52:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4301/12318 [7:26:33<13:52:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4301/12318 [7:26:33<13:52:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4302/12318 [7:26:41<13:52:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4302/12318 [7:26:41<13:52:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4303/12318 [7:26:44<13:52:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4303/12318 [7:26:44<13:52:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4304/12318 [7:26:49<13:51:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4304/12318 [7:26:49<13:51:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4305/12318 [7:26:56<13:51:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4305/12318 [7:26:56<13:51:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4306/12318 [7:26:59<13:51:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4306/12318 [7:26:59<13:51:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4307/12318 [7:27:03<13:51:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4307/12318 [7:27:03<13:51:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4308/12318 [7:27:05<13:51:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4308/12318 [7:27:05<13:51:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4309/12318 [7:27:09<13:51:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4309/12318 [7:27:09<13:51:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4310/12318 [7:27:16<13:51:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4310/12318 [7:27:16<13:51:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4311/12318 [7:27:23<13:50:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4311/12318 [7:27:23<13:50:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4312/12318 [7:27:32<13:50:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4312/12318 [7:27:32<13:50:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4313/12318 [7:27:38<13:50:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4313/12318 [7:27:38<13:50:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4314/12318 [7:27:40<13:50:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4314/12318 [7:27:40<13:50:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4315/12318 [7:27:44<13:50:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4315/12318 [7:27:44<13:50:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4316/12318 [7:27:47<13:50:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4316/12318 [7:27:47<13:50:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4317/12318 [7:27:51<13:50:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4317/12318 [7:27:51<13:50:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4318/12318 [7:27:53<13:49:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4318/12318 [7:27:53<13:49:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4319/12318 [7:27:58<13:49:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4319/12318 [7:27:58<13:49:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4320/12318 [7:28:39<13:50:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4320/12318 [7:28:39<13:50:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4321/12318 [7:28:41<13:50:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4321/12318 [7:28:41<13:50:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4322/12318 [7:28:44<13:50:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4322/12318 [7:28:44<13:50:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4323/12318 [7:28:50<13:50:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4323/12318 [7:28:50<13:50:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4324/12318 [7:28:55<13:49:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4324/12318 [7:28:55<13:49:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4325/12318 [7:29:00<13:49:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4325/12318 [7:29:00<13:49:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4326/12318 [7:29:08<13:49:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4326/12318 [7:29:08<13:49:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4327/12318 [7:29:14<13:49:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4327/12318 [7:29:14<13:49:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4328/12318 [7:29:20<13:49:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4328/12318 [7:29:20<13:49:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4329/12318 [7:29:25<13:49:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4329/12318 [7:29:25<13:49:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4330/12318 [7:29:29<13:49:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4330/12318 [7:29:29<13:49:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4331/12318 [7:29:37<13:49:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4331/12318 [7:29:37<13:49:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4332/12318 [7:29:46<13:49:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4332/12318 [7:29:46<13:49:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4333/12318 [7:29:54<13:49:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4333/12318 [7:29:54<13:49:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4334/12318 [7:29:59<13:48:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4334/12318 [7:29:59<13:48:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4335/12318 [7:30:03<13:48:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4335/12318 [7:30:03<13:48:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4336/12318 [7:30:07<13:48:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4336/12318 [7:30:07<13:48:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4337/12318 [7:30:14<13:48:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4337/12318 [7:30:14<13:48:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4338/12318 [7:30:18<13:48:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4338/12318 [7:30:18<13:48:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4339/12318 [7:30:23<13:48:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4339/12318 [7:30:23<13:48:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4340/12318 [7:30:30<13:48:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4340/12318 [7:30:30<13:48:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4341/12318 [7:30:32<13:47:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4341/12318 [7:30:32<13:47:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4342/12318 [7:30:36<13:47:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4342/12318 [7:30:36<13:47:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4343/12318 [7:30:44<13:47:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4343/12318 [7:30:44<13:47:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4344/12318 [7:30:53<13:47:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4344/12318 [7:30:53<13:47:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4345/12318 [7:30:58<13:47:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4345/12318 [7:30:58<13:47:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4346/12318 [7:31:04<13:47:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4346/12318 [7:31:04<13:47:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4347/12318 [7:31:10<13:47:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4347/12318 [7:31:10<13:47:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4348/12318 [7:31:18<13:47:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4348/12318 [7:31:18<13:47:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4349/12318 [7:31:22<13:47:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4349/12318 [7:31:22<13:47:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4350/12318 [7:31:27<13:46:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4350/12318 [7:31:27<13:46:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4351/12318 [7:31:30<13:46:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4351/12318 [7:31:30<13:46:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4352/12318 [7:31:52<13:47:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4352/12318 [7:31:52<13:47:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4353/12318 [7:32:00<13:47:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4353/12318 [7:32:00<13:47:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4354/12318 [7:32:05<13:46:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4354/12318 [7:32:05<13:46:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4355/12318 [7:32:10<13:46:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4355/12318 [7:32:10<13:46:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4356/12318 [7:32:15<13:46:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4356/12318 [7:32:15<13:46:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4357/12318 [7:32:21<13:46:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4357/12318 [7:32:21<13:46:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4358/12318 [7:32:27<13:46:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4358/12318 [7:32:27<13:46:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4359/12318 [7:32:31<13:46:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4359/12318 [7:32:31<13:46:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4360/12318 [7:32:34<13:46:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4360/12318 [7:32:34<13:46:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4361/12318 [7:32:43<13:46:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4361/12318 [7:32:43<13:46:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4362/12318 [7:32:51<13:45:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4362/12318 [7:32:51<13:45:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4363/12318 [7:32:58<13:45:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4363/12318 [7:32:58<13:45:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4364/12318 [7:33:04<13:45:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4364/12318 [7:33:04<13:45:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4365/12318 [7:33:12<13:45:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4365/12318 [7:33:12<13:45:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4366/12318 [7:33:16<13:45:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4366/12318 [7:33:16<13:45:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4367/12318 [7:33:18<13:45:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4367/12318 [7:33:18<13:45:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4368/12318 [7:33:21<13:45:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4368/12318 [7:33:21<13:45:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4369/12318 [7:33:29<13:45:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4369/12318 [7:33:29<13:45:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4370/12318 [7:33:32<13:44:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4370/12318 [7:33:32<13:44:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4371/12318 [7:33:37<13:44:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4371/12318 [7:33:37<13:44:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  35%|▎| 4372/12318 [7:33:39<13:44:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  35%|▎| 4372/12318 [7:33:39<13:44:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4373/12318 [7:33:47<13:44:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4373/12318 [7:33:47<13:44:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4374/12318 [7:33:51<13:44:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4374/12318 [7:33:51<13:44:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4375/12318 [7:33:58<13:44:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4375/12318 [7:33:58<13:44:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4376/12318 [7:34:00<13:43:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4376/12318 [7:34:00<13:43:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4377/12318 [7:34:03<13:43:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4377/12318 [7:34:03<13:43:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4378/12318 [7:34:09<13:43:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4378/12318 [7:34:09<13:43:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4379/12318 [7:34:14<13:43:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4379/12318 [7:34:14<13:43:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4380/12318 [7:34:16<13:43:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4380/12318 [7:34:16<13:43:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4381/12318 [7:34:21<13:43:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4381/12318 [7:34:21<13:43:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4382/12318 [7:34:25<13:42:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4382/12318 [7:34:25<13:42:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4383/12318 [7:34:26<13:42:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4383/12318 [7:34:26<13:42:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4384/12318 [7:35:05<13:43:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4384/12318 [7:35:05<13:43:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4385/12318 [7:35:14<13:43:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4385/12318 [7:35:14<13:43:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4386/12318 [7:35:21<13:43:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4386/12318 [7:35:21<13:43:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4387/12318 [7:35:28<13:43:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4387/12318 [7:35:28<13:43:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4388/12318 [7:35:36<13:43:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4388/12318 [7:35:36<13:43:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4389/12318 [7:35:41<13:43:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4389/12318 [7:35:41<13:43:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4390/12318 [7:35:44<13:43:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4390/12318 [7:35:44<13:43:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4391/12318 [7:35:52<13:42:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4391/12318 [7:35:52<13:42:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4392/12318 [7:35:55<13:42:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4392/12318 [7:35:55<13:42:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4393/12318 [7:36:01<13:42:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4393/12318 [7:36:01<13:42:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4394/12318 [7:36:08<13:42:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4394/12318 [7:36:08<13:42:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4395/12318 [7:36:12<13:42:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4395/12318 [7:36:12<13:42:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4396/12318 [7:36:14<13:42:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4396/12318 [7:36:14<13:42:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4397/12318 [7:36:20<13:42:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4397/12318 [7:36:20<13:42:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4398/12318 [7:36:22<13:41:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4398/12318 [7:36:22<13:41:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4399/12318 [7:36:28<13:41:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4399/12318 [7:36:28<13:41:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4400/12318 [7:36:36<13:41:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4400/12318 [7:36:36<13:41:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4401/12318 [7:36:39<13:41:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4401/12318 [7:36:39<13:41:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4402/12318 [7:36:45<13:41:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4402/12318 [7:36:45<13:41:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4403/12318 [7:36:46<13:41:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4403/12318 [7:36:46<13:41:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4404/12318 [7:36:50<13:40:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4404/12318 [7:36:50<13:40:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4405/12318 [7:36:53<13:40:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4405/12318 [7:36:53<13:40:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4406/12318 [7:36:54<13:40:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4406/12318 [7:36:54<13:40:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4407/12318 [7:36:58<13:40:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4407/12318 [7:36:58<13:40:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4408/12318 [7:37:05<13:40:13,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4408/12318 [7:37:05<13:40:13,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4409/12318 [7:37:14<13:40:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4409/12318 [7:37:14<13:40:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4410/12318 [7:37:22<13:40:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4410/12318 [7:37:22<13:40:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4411/12318 [7:37:25<13:39:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4411/12318 [7:37:25<13:39:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4412/12318 [7:37:34<13:39:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4412/12318 [7:37:34<13:39:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4413/12318 [7:37:36<13:39:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4413/12318 [7:37:36<13:39:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4414/12318 [7:37:43<13:39:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4414/12318 [7:37:43<13:39:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4415/12318 [7:37:49<13:39:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4415/12318 [7:37:49<13:39:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4416/12318 [7:38:28<13:40:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4416/12318 [7:38:28<13:40:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4417/12318 [7:38:29<13:40:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4417/12318 [7:38:29<13:40:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4418/12318 [7:38:34<13:39:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4418/12318 [7:38:34<13:39:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4419/12318 [7:38:42<13:39:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4419/12318 [7:38:42<13:39:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4420/12318 [7:38:49<13:39:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4420/12318 [7:38:49<13:39:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4421/12318 [7:38:55<13:39:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4421/12318 [7:38:55<13:39:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4422/12318 [7:39:00<13:39:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4422/12318 [7:39:00<13:39:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4423/12318 [7:39:07<13:39:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4423/12318 [7:39:07<13:39:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4424/12318 [7:39:15<13:39:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4424/12318 [7:39:15<13:39:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4425/12318 [7:39:21<13:39:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4425/12318 [7:39:21<13:39:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4426/12318 [7:39:26<13:39:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4426/12318 [7:39:26<13:39:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4427/12318 [7:39:31<13:39:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4427/12318 [7:39:31<13:39:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4428/12318 [7:39:36<13:38:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4428/12318 [7:39:36<13:38:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4429/12318 [7:39:40<13:38:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4429/12318 [7:39:40<13:38:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4430/12318 [7:39:44<13:38:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4430/12318 [7:39:44<13:38:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4431/12318 [7:39:46<13:38:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4431/12318 [7:39:46<13:38:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4432/12318 [7:39:54<13:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4432/12318 [7:39:54<13:38:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4433/12318 [7:39:59<13:38:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4433/12318 [7:39:59<13:38:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4434/12318 [7:40:07<13:38:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4434/12318 [7:40:07<13:38:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4435/12318 [7:40:14<13:38:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4435/12318 [7:40:14<13:38:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4436/12318 [7:40:19<13:37:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4436/12318 [7:40:19<13:37:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4437/12318 [7:40:26<13:37:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4437/12318 [7:40:26<13:37:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4438/12318 [7:40:30<13:37:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4438/12318 [7:40:30<13:37:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4439/12318 [7:40:34<13:37:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4439/12318 [7:40:34<13:37:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4440/12318 [7:40:40<13:37:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4440/12318 [7:40:40<13:37:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4441/12318 [7:40:44<13:37:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4441/12318 [7:40:44<13:37:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4442/12318 [7:40:49<13:37:05,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4442/12318 [7:40:49<13:37:05,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4443/12318 [7:40:57<13:37:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4443/12318 [7:40:57<13:37:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4444/12318 [7:40:59<13:36:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4444/12318 [7:40:59<13:36:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4445/12318 [7:41:04<13:36:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4445/12318 [7:41:04<13:36:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4446/12318 [7:41:09<13:36:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4446/12318 [7:41:09<13:36:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4447/12318 [7:41:15<13:36:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4447/12318 [7:41:15<13:36:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4448/12318 [7:41:54<13:37:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4448/12318 [7:41:54<13:37:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4449/12318 [7:41:58<13:37:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4449/12318 [7:41:58<13:37:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4450/12318 [7:42:02<13:36:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4450/12318 [7:42:02<13:36:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4451/12318 [7:42:05<13:36:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4451/12318 [7:42:05<13:36:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4452/12318 [7:42:09<13:36:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4452/12318 [7:42:09<13:36:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4453/12318 [7:42:14<13:36:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4453/12318 [7:42:14<13:36:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4454/12318 [7:42:18<13:36:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4454/12318 [7:42:18<13:36:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4455/12318 [7:42:23<13:36:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4455/12318 [7:42:23<13:36:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4456/12318 [7:42:29<13:36:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4456/12318 [7:42:29<13:36:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4457/12318 [7:42:34<13:35:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4457/12318 [7:42:34<13:35:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4458/12318 [7:42:35<13:35:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4458/12318 [7:42:35<13:35:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4459/12318 [7:42:37<13:35:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4459/12318 [7:42:37<13:35:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4460/12318 [7:42:46<13:35:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4460/12318 [7:42:46<13:35:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4461/12318 [7:42:53<13:35:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4461/12318 [7:42:53<13:35:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4462/12318 [7:42:55<13:35:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4462/12318 [7:42:55<13:35:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4463/12318 [7:42:59<13:34:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4463/12318 [7:42:59<13:34:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4464/12318 [7:43:05<13:34:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4464/12318 [7:43:05<13:34:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4465/12318 [7:43:07<13:34:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4465/12318 [7:43:07<13:34:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4466/12318 [7:43:14<13:34:28,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4466/12318 [7:43:14<13:34:28,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4467/12318 [7:43:17<13:34:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4467/12318 [7:43:17<13:34:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4468/12318 [7:43:26<13:34:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4468/12318 [7:43:26<13:34:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4469/12318 [7:43:34<13:34:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4469/12318 [7:43:34<13:34:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4470/12318 [7:43:38<13:34:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4470/12318 [7:43:38<13:34:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4471/12318 [7:43:42<13:33:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4471/12318 [7:43:42<13:33:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4472/12318 [7:43:47<13:33:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4472/12318 [7:43:47<13:33:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4473/12318 [7:43:51<13:33:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4473/12318 [7:43:51<13:33:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4474/12318 [7:43:53<13:33:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4474/12318 [7:43:53<13:33:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4475/12318 [7:43:55<13:33:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4475/12318 [7:43:55<13:33:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4476/12318 [7:43:57<13:32:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4476/12318 [7:43:57<13:32:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4477/12318 [7:44:04<13:32:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4477/12318 [7:44:04<13:32:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4478/12318 [7:44:10<13:32:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4478/12318 [7:44:10<13:32:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4479/12318 [7:44:17<13:32:34,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4479/12318 [7:44:17<13:32:34,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4480/12318 [7:45:04<13:33:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4480/12318 [7:45:04<13:33:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4481/12318 [7:45:13<13:33:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4481/12318 [7:45:13<13:33:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4482/12318 [7:45:21<13:33:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4482/12318 [7:45:21<13:33:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4483/12318 [7:45:30<13:33:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4483/12318 [7:45:30<13:33:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4484/12318 [7:45:34<13:33:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4484/12318 [7:45:34<13:33:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4485/12318 [7:45:41<13:33:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4485/12318 [7:45:41<13:33:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4486/12318 [7:45:47<13:33:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4486/12318 [7:45:47<13:33:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4487/12318 [7:45:53<13:33:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4487/12318 [7:45:53<13:33:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4488/12318 [7:45:54<13:32:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4488/12318 [7:45:54<13:32:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4489/12318 [7:46:02<13:32:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4489/12318 [7:46:02<13:32:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4490/12318 [7:46:07<13:32:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4490/12318 [7:46:07<13:32:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4491/12318 [7:46:16<13:32:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4491/12318 [7:46:16<13:32:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4492/12318 [7:46:21<13:32:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4492/12318 [7:46:21<13:32:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4493/12318 [7:46:24<13:32:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4493/12318 [7:46:24<13:32:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4494/12318 [7:46:32<13:32:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4494/12318 [7:46:32<13:32:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4495/12318 [7:46:39<13:32:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4495/12318 [7:46:39<13:32:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  36%|▎| 4496/12318 [7:46:42<13:31:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  36%|▎| 4496/12318 [7:46:42<13:31:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4497/12318 [7:46:48<13:31:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4497/12318 [7:46:48<13:31:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4498/12318 [7:46:54<13:31:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4498/12318 [7:46:54<13:31:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4499/12318 [7:46:56<13:31:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4499/12318 [7:46:56<13:31:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4500/12318 [7:46:58<13:31:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4500/12318 [7:46:58<13:31:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4501/12318 [7:47:03<13:31:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4501/12318 [7:47:03<13:31:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4502/12318 [7:47:10<13:31:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4502/12318 [7:47:10<13:31:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4503/12318 [7:47:12<13:30:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4503/12318 [7:47:12<13:30:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4504/12318 [7:47:21<13:30:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4504/12318 [7:47:21<13:30:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4505/12318 [7:47:23<13:30:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4505/12318 [7:47:23<13:30:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4506/12318 [7:47:26<13:30:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4506/12318 [7:47:26<13:30:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4507/12318 [7:47:29<13:30:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4507/12318 [7:47:29<13:30:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4508/12318 [7:47:34<13:30:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4508/12318 [7:47:34<13:30:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4509/12318 [7:47:38<13:29:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4509/12318 [7:47:38<13:29:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4510/12318 [7:47:45<13:29:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4510/12318 [7:47:45<13:29:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4511/12318 [7:47:49<13:29:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4511/12318 [7:47:49<13:29:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4512/12318 [7:48:22<13:30:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4512/12318 [7:48:22<13:30:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4513/12318 [7:48:26<13:30:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4513/12318 [7:48:26<13:30:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4514/12318 [7:48:29<13:29:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4514/12318 [7:48:29<13:29:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4515/12318 [7:48:37<13:29:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4515/12318 [7:48:37<13:29:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4516/12318 [7:48:41<13:29:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4516/12318 [7:48:41<13:29:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4517/12318 [7:48:46<13:29:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4517/12318 [7:48:46<13:29:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4518/12318 [7:48:51<13:29:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4518/12318 [7:48:51<13:29:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4519/12318 [7:48:57<13:29:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4519/12318 [7:48:57<13:29:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4520/12318 [7:49:06<13:29:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4520/12318 [7:49:06<13:29:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4521/12318 [7:49:14<13:29:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4521/12318 [7:49:14<13:29:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4522/12318 [7:49:19<13:29:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4522/12318 [7:49:19<13:29:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4523/12318 [7:49:27<13:29:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4523/12318 [7:49:27<13:29:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4524/12318 [7:49:32<13:28:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4524/12318 [7:49:32<13:28:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4525/12318 [7:49:40<13:28:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4525/12318 [7:49:40<13:28:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4526/12318 [7:49:41<13:28:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4526/12318 [7:49:41<13:28:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4527/12318 [7:49:42<13:28:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4527/12318 [7:49:42<13:28:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4528/12318 [7:49:50<13:28:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4528/12318 [7:49:50<13:28:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4529/12318 [7:49:56<13:28:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4529/12318 [7:49:56<13:28:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4530/12318 [7:50:04<13:28:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4530/12318 [7:50:04<13:28:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4531/12318 [7:50:10<13:28:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4531/12318 [7:50:10<13:28:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4532/12318 [7:50:16<13:27:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4532/12318 [7:50:16<13:27:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4533/12318 [7:50:25<13:27:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4533/12318 [7:50:25<13:27:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4534/12318 [7:50:31<13:27:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4534/12318 [7:50:31<13:27:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4535/12318 [7:50:36<13:27:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4535/12318 [7:50:36<13:27:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4536/12318 [7:50:40<13:27:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4536/12318 [7:50:40<13:27:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4537/12318 [7:50:45<13:27:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4537/12318 [7:50:45<13:27:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4538/12318 [7:50:54<13:27:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4538/12318 [7:50:54<13:27:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4539/12318 [7:51:03<13:27:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4539/12318 [7:51:03<13:27:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4540/12318 [7:51:05<13:27:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4540/12318 [7:51:05<13:27:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4541/12318 [7:51:10<13:26:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4541/12318 [7:51:10<13:26:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4542/12318 [7:51:18<13:26:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4542/12318 [7:51:18<13:26:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4543/12318 [7:51:19<13:26:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4543/12318 [7:51:19<13:26:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4544/12318 [7:51:37<13:26:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4544/12318 [7:51:37<13:26:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4545/12318 [7:51:45<13:26:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4545/12318 [7:51:45<13:26:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4546/12318 [7:51:53<13:26:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4546/12318 [7:51:53<13:26:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4547/12318 [7:51:59<13:26:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4547/12318 [7:51:59<13:26:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4548/12318 [7:52:06<13:26:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4548/12318 [7:52:06<13:26:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4549/12318 [7:52:15<13:26:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4549/12318 [7:52:15<13:26:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4550/12318 [7:52:21<13:26:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4550/12318 [7:52:21<13:26:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4551/12318 [7:52:29<13:26:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4551/12318 [7:52:29<13:26:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4552/12318 [7:52:34<13:26:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4552/12318 [7:52:34<13:26:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4553/12318 [7:52:36<13:26:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4553/12318 [7:52:36<13:26:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4554/12318 [7:52:45<13:25:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4554/12318 [7:52:45<13:25:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4555/12318 [7:52:52<13:25:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4555/12318 [7:52:52<13:25:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4556/12318 [7:52:55<13:25:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4556/12318 [7:52:55<13:25:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4557/12318 [7:53:02<13:25:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4557/12318 [7:53:02<13:25:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4558/12318 [7:53:08<13:25:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4558/12318 [7:53:08<13:25:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4559/12318 [7:53:09<13:25:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4559/12318 [7:53:09<13:25:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4560/12318 [7:53:10<13:25:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4560/12318 [7:53:10<13:25:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4561/12318 [7:53:19<13:25:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4561/12318 [7:53:19<13:25:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4562/12318 [7:53:26<13:24:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4562/12318 [7:53:26<13:24:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4563/12318 [7:53:35<13:24:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4563/12318 [7:53:35<13:24:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4564/12318 [7:53:40<13:24:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4564/12318 [7:53:40<13:24:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4565/12318 [7:53:48<13:24:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4565/12318 [7:53:48<13:24:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4566/12318 [7:53:57<13:24:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4566/12318 [7:53:57<13:24:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4567/12318 [7:54:00<13:24:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4567/12318 [7:54:00<13:24:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4568/12318 [7:54:08<13:24:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4568/12318 [7:54:08<13:24:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4569/12318 [7:54:13<13:24:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4569/12318 [7:54:13<13:24:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4570/12318 [7:54:17<13:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4570/12318 [7:54:17<13:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4571/12318 [7:54:22<13:23:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4571/12318 [7:54:22<13:23:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4572/12318 [7:54:27<13:23:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4572/12318 [7:54:27<13:23:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4573/12318 [7:54:34<13:23:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4573/12318 [7:54:34<13:23:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4574/12318 [7:54:36<13:23:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4574/12318 [7:54:36<13:23:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4575/12318 [7:54:41<13:23:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4575/12318 [7:54:41<13:23:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4576/12318 [7:54:58<13:23:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4576/12318 [7:54:58<13:23:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4577/12318 [7:55:02<13:23:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4577/12318 [7:55:02<13:23:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4578/12318 [7:55:04<13:23:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4578/12318 [7:55:04<13:23:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4579/12318 [7:55:11<13:23:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4579/12318 [7:55:11<13:23:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4580/12318 [7:55:19<13:23:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4580/12318 [7:55:19<13:23:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4581/12318 [7:55:24<13:22:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4581/12318 [7:55:24<13:22:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4582/12318 [7:55:27<13:22:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4582/12318 [7:55:27<13:22:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4583/12318 [7:55:31<13:22:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4583/12318 [7:55:31<13:22:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4584/12318 [7:55:33<13:22:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4584/12318 [7:55:33<13:22:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4585/12318 [7:55:41<13:22:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4585/12318 [7:55:41<13:22:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4586/12318 [7:55:47<13:22:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4586/12318 [7:55:47<13:22:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4587/12318 [7:55:53<13:22:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4587/12318 [7:55:53<13:22:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4588/12318 [7:55:57<13:21:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4588/12318 [7:55:57<13:21:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4589/12318 [7:56:01<13:21:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4589/12318 [7:56:01<13:21:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4590/12318 [7:56:04<13:21:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4590/12318 [7:56:04<13:21:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4591/12318 [7:56:13<13:21:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4591/12318 [7:56:13<13:21:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4592/12318 [7:56:18<13:21:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4592/12318 [7:56:18<13:21:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4593/12318 [7:56:26<13:21:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4593/12318 [7:56:26<13:21:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4594/12318 [7:56:33<13:21:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4594/12318 [7:56:33<13:21:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4595/12318 [7:56:41<13:21:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4595/12318 [7:56:41<13:21:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4596/12318 [7:56:48<13:21:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4596/12318 [7:56:48<13:21:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4597/12318 [7:56:55<13:21:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4597/12318 [7:56:55<13:21:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4598/12318 [7:56:58<13:20:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4598/12318 [7:56:58<13:20:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4599/12318 [7:57:06<13:20:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4599/12318 [7:57:06<13:20:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4600/12318 [7:57:11<13:20:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4600/12318 [7:57:11<13:20:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4601/12318 [7:57:13<13:20:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4601/12318 [7:57:13<13:20:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4602/12318 [7:57:17<13:20:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4602/12318 [7:57:17<13:20:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4603/12318 [7:57:24<13:20:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4603/12318 [7:57:24<13:20:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4604/12318 [7:57:32<13:20:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4604/12318 [7:57:32<13:20:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4605/12318 [7:57:39<13:20:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4605/12318 [7:57:39<13:20:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4606/12318 [7:57:44<13:19:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4606/12318 [7:57:44<13:19:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4607/12318 [7:57:51<13:19:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4607/12318 [7:57:51<13:19:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4608/12318 [7:58:24<13:20:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4608/12318 [7:58:24<13:20:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4609/12318 [7:58:32<13:20:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4609/12318 [7:58:32<13:20:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4610/12318 [7:58:33<13:20:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4610/12318 [7:58:33<13:20:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4611/12318 [7:58:39<13:20:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4611/12318 [7:58:39<13:20:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4612/12318 [7:58:47<13:20:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4612/12318 [7:58:47<13:20:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4613/12318 [7:58:53<13:19:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4613/12318 [7:58:53<13:19:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4614/12318 [7:58:57<13:19:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4614/12318 [7:58:57<13:19:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4615/12318 [7:59:05<13:19:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4615/12318 [7:59:05<13:19:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4616/12318 [7:59:09<13:19:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4616/12318 [7:59:09<13:19:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4617/12318 [7:59:14<13:19:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4617/12318 [7:59:14<13:19:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4618/12318 [7:59:18<13:19:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  37%|▎| 4618/12318 [7:59:18<13:19:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4619/12318 [7:59:24<13:19:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  37%|▎| 4619/12318 [7:59:24<13:19:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4620/12318 [7:59:28<13:18:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4620/12318 [7:59:28<13:18:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4621/12318 [7:59:31<13:18:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4621/12318 [7:59:31<13:18:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4622/12318 [7:59:40<13:18:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4622/12318 [7:59:40<13:18:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4623/12318 [7:59:48<13:18:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4623/12318 [7:59:48<13:18:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4624/12318 [7:59:56<13:18:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4624/12318 [7:59:56<13:18:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4625/12318 [7:59:59<13:18:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4625/12318 [7:59:59<13:18:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4626/12318 [8:00:04<13:18:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4626/12318 [8:00:04<13:18:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4627/12318 [8:00:10<13:18:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4627/12318 [8:00:10<13:18:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4628/12318 [8:00:19<13:18:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4628/12318 [8:00:19<13:18:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4629/12318 [8:00:26<13:18:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4629/12318 [8:00:26<13:18:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4630/12318 [8:00:34<13:17:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4630/12318 [8:00:34<13:17:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4631/12318 [8:00:41<13:17:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4631/12318 [8:00:41<13:17:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4632/12318 [8:00:49<13:17:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4632/12318 [8:00:49<13:17:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4633/12318 [8:00:54<13:17:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4633/12318 [8:00:54<13:17:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4634/12318 [8:01:00<13:17:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4634/12318 [8:01:00<13:17:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4635/12318 [8:01:03<13:17:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4635/12318 [8:01:03<13:17:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4636/12318 [8:01:12<13:17:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4636/12318 [8:01:12<13:17:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4637/12318 [8:01:18<13:17:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4637/12318 [8:01:18<13:17:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4638/12318 [8:01:27<13:17:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4638/12318 [8:01:27<13:17:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4639/12318 [8:01:34<13:17:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4639/12318 [8:01:34<13:17:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4640/12318 [8:01:50<13:17:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4640/12318 [8:01:50<13:17:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4641/12318 [8:01:54<13:17:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4641/12318 [8:01:54<13:17:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4642/12318 [8:01:59<13:17:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4642/12318 [8:01:59<13:17:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4643/12318 [8:02:07<13:16:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4643/12318 [8:02:07<13:16:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4644/12318 [8:02:14<13:16:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4644/12318 [8:02:14<13:16:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4645/12318 [8:02:16<13:16:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4645/12318 [8:02:16<13:16:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4646/12318 [8:02:24<13:16:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4646/12318 [8:02:24<13:16:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4647/12318 [8:02:29<13:16:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4647/12318 [8:02:29<13:16:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4648/12318 [8:02:38<13:16:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4648/12318 [8:02:38<13:16:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4649/12318 [8:02:43<13:16:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4649/12318 [8:02:43<13:16:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4650/12318 [8:02:50<13:16:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4650/12318 [8:02:50<13:16:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4651/12318 [8:02:51<13:15:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4651/12318 [8:02:51<13:15:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4652/12318 [8:02:54<13:15:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4652/12318 [8:02:54<13:15:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4653/12318 [8:02:57<13:15:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4653/12318 [8:02:57<13:15:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4654/12318 [8:03:03<13:15:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4654/12318 [8:03:03<13:15:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4655/12318 [8:03:11<13:15:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4655/12318 [8:03:11<13:15:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4656/12318 [8:03:19<13:15:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4656/12318 [8:03:19<13:15:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4657/12318 [8:03:27<13:15:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4657/12318 [8:03:27<13:15:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4658/12318 [8:03:36<13:15:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4658/12318 [8:03:36<13:15:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4659/12318 [8:03:39<13:15:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4659/12318 [8:03:39<13:15:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4660/12318 [8:03:47<13:15:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4660/12318 [8:03:47<13:15:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4661/12318 [8:03:49<13:14:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4661/12318 [8:03:49<13:14:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4662/12318 [8:03:58<13:14:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4662/12318 [8:03:58<13:14:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4663/12318 [8:04:05<13:14:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4663/12318 [8:04:05<13:14:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4664/12318 [8:04:12<13:14:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4664/12318 [8:04:12<13:14:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4665/12318 [8:04:19<13:14:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4665/12318 [8:04:19<13:14:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4666/12318 [8:04:28<13:14:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4666/12318 [8:04:28<13:14:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4667/12318 [8:04:36<13:14:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4667/12318 [8:04:36<13:14:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4668/12318 [8:04:44<13:14:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4668/12318 [8:04:44<13:14:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4669/12318 [8:04:49<13:14:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4669/12318 [8:04:49<13:14:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4670/12318 [8:04:56<13:14:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4670/12318 [8:04:56<13:14:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4671/12318 [8:05:01<13:14:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4671/12318 [8:05:01<13:14:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4672/12318 [8:05:17<13:14:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4672/12318 [8:05:17<13:14:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4673/12318 [8:05:23<13:14:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4673/12318 [8:05:23<13:14:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4674/12318 [8:05:27<13:13:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4674/12318 [8:05:27<13:13:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4675/12318 [8:05:31<13:13:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4675/12318 [8:05:31<13:13:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4676/12318 [8:05:34<13:13:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4676/12318 [8:05:34<13:13:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4677/12318 [8:05:38<13:13:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4677/12318 [8:05:38<13:13:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4678/12318 [8:05:43<13:13:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4678/12318 [8:05:43<13:13:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4679/12318 [8:05:52<13:13:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4679/12318 [8:05:52<13:13:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4680/12318 [8:05:54<13:13:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4680/12318 [8:05:54<13:13:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4681/12318 [8:05:59<13:12:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4681/12318 [8:05:59<13:12:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4682/12318 [8:06:05<13:12:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4682/12318 [8:06:05<13:12:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4683/12318 [8:06:11<13:12:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4683/12318 [8:06:11<13:12:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4684/12318 [8:06:16<13:12:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4684/12318 [8:06:16<13:12:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4685/12318 [8:06:22<13:12:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4685/12318 [8:06:22<13:12:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4686/12318 [8:06:29<13:12:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4686/12318 [8:06:29<13:12:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4687/12318 [8:06:37<13:12:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4687/12318 [8:06:37<13:12:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4688/12318 [8:06:38<13:12:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4688/12318 [8:06:38<13:12:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4689/12318 [8:06:41<13:11:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4689/12318 [8:06:41<13:11:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4690/12318 [8:06:45<13:11:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4690/12318 [8:06:45<13:11:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4691/12318 [8:06:51<13:11:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4691/12318 [8:06:51<13:11:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4692/12318 [8:06:54<13:11:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4692/12318 [8:06:54<13:11:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4693/12318 [8:07:02<13:11:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4693/12318 [8:07:02<13:11:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4694/12318 [8:07:09<13:11:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4694/12318 [8:07:09<13:11:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4695/12318 [8:07:10<13:11:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4695/12318 [8:07:10<13:11:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4696/12318 [8:07:14<13:10:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4696/12318 [8:07:14<13:10:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4697/12318 [8:07:20<13:10:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4697/12318 [8:07:20<13:10:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4698/12318 [8:07:27<13:10:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4698/12318 [8:07:27<13:10:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4699/12318 [8:07:29<13:10:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4699/12318 [8:07:29<13:10:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4700/12318 [8:07:35<13:10:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4700/12318 [8:07:35<13:10:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4701/12318 [8:07:41<13:10:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4701/12318 [8:07:41<13:10:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4702/12318 [8:07:43<13:09:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4702/12318 [8:07:43<13:09:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4703/12318 [8:07:46<13:09:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4703/12318 [8:07:46<13:09:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4704/12318 [8:08:29<13:10:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4704/12318 [8:08:29<13:10:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4705/12318 [8:08:38<13:10:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4705/12318 [8:08:38<13:10:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4706/12318 [8:08:43<13:10:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4706/12318 [8:08:43<13:10:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4707/12318 [8:08:48<13:10:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4707/12318 [8:08:48<13:10:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4708/12318 [8:08:50<13:10:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4708/12318 [8:08:50<13:10:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4709/12318 [8:08:56<13:10:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4709/12318 [8:08:56<13:10:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4710/12318 [8:09:02<13:09:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4710/12318 [8:09:02<13:09:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4711/12318 [8:09:03<13:09:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4711/12318 [8:09:03<13:09:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4712/12318 [8:09:10<13:09:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4712/12318 [8:09:10<13:09:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4713/12318 [8:09:16<13:09:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4713/12318 [8:09:16<13:09:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4714/12318 [8:09:24<13:09:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4714/12318 [8:09:24<13:09:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4715/12318 [8:09:26<13:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4715/12318 [8:09:26<13:09:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4716/12318 [8:09:30<13:09:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4716/12318 [8:09:30<13:09:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4717/12318 [8:09:37<13:08:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4717/12318 [8:09:37<13:08:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4718/12318 [8:09:45<13:08:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4718/12318 [8:09:45<13:08:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4719/12318 [8:09:49<13:08:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4719/12318 [8:09:49<13:08:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4720/12318 [8:09:52<13:08:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4720/12318 [8:09:52<13:08:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4721/12318 [8:09:56<13:08:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4721/12318 [8:09:56<13:08:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4722/12318 [8:10:01<13:08:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4722/12318 [8:10:01<13:08:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4723/12318 [8:10:06<13:08:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4723/12318 [8:10:06<13:08:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4724/12318 [8:10:11<13:08:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4724/12318 [8:10:11<13:08:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4725/12318 [8:10:17<13:07:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4725/12318 [8:10:17<13:07:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4726/12318 [8:10:22<13:07:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4726/12318 [8:10:22<13:07:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4727/12318 [8:10:25<13:07:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4727/12318 [8:10:25<13:07:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4728/12318 [8:10:32<13:07:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4728/12318 [8:10:32<13:07:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4729/12318 [8:10:38<13:07:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4729/12318 [8:10:38<13:07:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4730/12318 [8:10:44<13:07:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4730/12318 [8:10:44<13:07:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4731/12318 [8:10:52<13:07:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4731/12318 [8:10:52<13:07:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4732/12318 [8:11:01<13:07:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4732/12318 [8:11:01<13:07:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4733/12318 [8:11:06<13:07:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4733/12318 [8:11:06<13:07:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4734/12318 [8:11:14<13:06:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4734/12318 [8:11:14<13:06:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4735/12318 [8:11:21<13:06:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4735/12318 [8:11:21<13:06:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4736/12318 [8:11:42<13:07:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4736/12318 [8:11:42<13:07:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4737/12318 [8:11:49<13:07:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4737/12318 [8:11:49<13:07:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4738/12318 [8:11:56<13:07:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4738/12318 [8:11:56<13:07:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4739/12318 [8:12:03<13:06:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4739/12318 [8:12:03<13:06:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4740/12318 [8:12:07<13:06:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4740/12318 [8:12:07<13:06:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4741/12318 [8:12:14<13:06:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4741/12318 [8:12:14<13:06:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  38%|▍| 4742/12318 [8:12:19<13:06:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  38%|▍| 4742/12318 [8:12:19<13:06:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4743/12318 [8:12:28<13:06:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4743/12318 [8:12:28<13:06:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4744/12318 [8:12:36<13:06:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4744/12318 [8:12:36<13:06:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4745/12318 [8:12:39<13:06:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4745/12318 [8:12:39<13:06:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4746/12318 [8:12:45<13:06:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4746/12318 [8:12:45<13:06:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4747/12318 [8:12:47<13:05:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4747/12318 [8:12:47<13:05:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4748/12318 [8:12:49<13:05:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4748/12318 [8:12:49<13:05:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4749/12318 [8:12:52<13:05:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4749/12318 [8:12:52<13:05:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4750/12318 [8:12:53<13:05:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4750/12318 [8:12:53<13:05:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4751/12318 [8:12:57<13:05:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4751/12318 [8:12:57<13:05:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4752/12318 [8:12:58<13:04:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4752/12318 [8:12:58<13:04:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4753/12318 [8:13:06<13:04:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4753/12318 [8:13:06<13:04:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4754/12318 [8:13:11<13:04:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4754/12318 [8:13:11<13:04:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4755/12318 [8:13:18<13:04:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4755/12318 [8:13:18<13:04:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4756/12318 [8:13:25<13:04:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4756/12318 [8:13:25<13:04:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4757/12318 [8:13:31<13:04:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4757/12318 [8:13:31<13:04:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4758/12318 [8:13:40<13:04:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4758/12318 [8:13:40<13:04:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4759/12318 [8:13:46<13:04:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4759/12318 [8:13:46<13:04:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4760/12318 [8:13:50<13:04:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4760/12318 [8:13:50<13:04:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4761/12318 [8:13:53<13:03:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4761/12318 [8:13:53<13:03:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4762/12318 [8:13:58<13:03:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4762/12318 [8:13:58<13:03:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4763/12318 [8:14:06<13:03:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4763/12318 [8:14:06<13:03:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4764/12318 [8:14:11<13:03:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4764/12318 [8:14:11<13:03:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4765/12318 [8:14:14<13:03:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4765/12318 [8:14:14<13:03:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4766/12318 [8:14:18<13:03:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4766/12318 [8:14:18<13:03:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4767/12318 [8:14:23<13:03:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4767/12318 [8:14:23<13:03:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4768/12318 [8:15:03<13:03:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4768/12318 [8:15:03<13:03:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4769/12318 [8:15:07<13:03:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4769/12318 [8:15:07<13:03:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4770/12318 [8:15:15<13:03:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4770/12318 [8:15:15<13:03:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4771/12318 [8:15:23<13:03:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4771/12318 [8:15:23<13:03:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4772/12318 [8:15:25<13:03:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4772/12318 [8:15:25<13:03:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4773/12318 [8:15:29<13:03:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4773/12318 [8:15:29<13:03:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4774/12318 [8:15:34<13:03:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4774/12318 [8:15:34<13:03:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4775/12318 [8:15:39<13:02:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4775/12318 [8:15:39<13:02:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4776/12318 [8:15:47<13:02:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4776/12318 [8:15:47<13:02:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4777/12318 [8:15:52<13:02:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4777/12318 [8:15:52<13:02:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4778/12318 [8:16:00<13:02:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4778/12318 [8:16:00<13:02:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4779/12318 [8:16:02<13:02:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4779/12318 [8:16:02<13:02:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4780/12318 [8:16:08<13:02:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4780/12318 [8:16:08<13:02:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4781/12318 [8:16:11<13:02:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4781/12318 [8:16:11<13:02:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4782/12318 [8:16:19<13:02:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4782/12318 [8:16:19<13:02:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4783/12318 [8:16:25<13:02:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4783/12318 [8:16:25<13:02:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4784/12318 [8:16:31<13:01:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4784/12318 [8:16:31<13:01:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4785/12318 [8:16:39<13:01:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4785/12318 [8:16:39<13:01:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4786/12318 [8:16:46<13:01:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4786/12318 [8:16:46<13:01:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4787/12318 [8:16:53<13:01:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4787/12318 [8:16:53<13:01:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4788/12318 [8:17:02<13:01:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4788/12318 [8:17:02<13:01:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4789/12318 [8:17:07<13:01:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4789/12318 [8:17:07<13:01:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4790/12318 [8:17:10<13:01:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4790/12318 [8:17:10<13:01:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4791/12318 [8:17:11<13:01:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4791/12318 [8:17:11<13:01:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4792/12318 [8:17:14<13:00:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4792/12318 [8:17:14<13:00:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4793/12318 [8:17:16<13:00:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4793/12318 [8:17:16<13:00:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4794/12318 [8:17:20<13:00:33,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4794/12318 [8:17:20<13:00:33,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4795/12318 [8:17:22<13:00:21,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4795/12318 [8:17:22<13:00:21,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4796/12318 [8:17:31<13:00:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4796/12318 [8:17:31<13:00:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4797/12318 [8:17:33<13:00:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4797/12318 [8:17:33<13:00:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4798/12318 [8:17:40<13:00:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4798/12318 [8:17:40<13:00:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4799/12318 [8:17:42<12:59:47,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4799/12318 [8:17:42<12:59:47,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4800/12318 [8:18:18<13:00:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4800/12318 [8:18:18<13:00:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4801/12318 [8:18:43<13:00:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4801/12318 [8:18:43<13:00:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4802/12318 [8:18:50<13:00:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4802/12318 [8:18:50<13:00:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4803/12318 [8:18:52<13:00:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4803/12318 [8:18:52<13:00:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4804/12318 [8:18:57<13:00:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4804/12318 [8:18:57<13:00:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4805/12318 [8:19:00<13:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4805/12318 [8:19:00<13:00:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4806/12318 [8:19:05<13:00:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4806/12318 [8:19:05<13:00:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4807/12318 [8:19:14<13:00:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4807/12318 [8:19:14<13:00:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4808/12318 [8:19:20<12:59:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4808/12318 [8:19:20<12:59:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4809/12318 [8:19:25<12:59:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4809/12318 [8:19:25<12:59:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4810/12318 [8:19:34<12:59:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4810/12318 [8:19:34<12:59:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4811/12318 [8:19:40<12:59:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4811/12318 [8:19:40<12:59:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4812/12318 [8:19:46<12:59:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4812/12318 [8:19:46<12:59:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4813/12318 [8:19:55<12:59:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4813/12318 [8:19:55<12:59:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4814/12318 [8:20:03<12:59:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4814/12318 [8:20:03<12:59:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4815/12318 [8:20:09<12:59:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4815/12318 [8:20:09<12:59:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4816/12318 [8:20:12<12:59:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4816/12318 [8:20:12<12:59:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4817/12318 [8:20:15<12:59:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4817/12318 [8:20:15<12:59:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4818/12318 [8:20:19<12:58:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4818/12318 [8:20:19<12:58:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4819/12318 [8:20:22<12:58:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4819/12318 [8:20:22<12:58:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4820/12318 [8:20:24<12:58:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4820/12318 [8:20:24<12:58:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4821/12318 [8:20:26<12:58:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4821/12318 [8:20:26<12:58:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4822/12318 [8:20:34<12:58:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4822/12318 [8:20:34<12:58:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4823/12318 [8:20:41<12:58:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4823/12318 [8:20:41<12:58:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4824/12318 [8:20:50<12:58:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4824/12318 [8:20:50<12:58:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4825/12318 [8:20:52<12:57:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4825/12318 [8:20:52<12:57:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4826/12318 [8:21:00<12:57:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4826/12318 [8:21:00<12:57:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4827/12318 [8:21:04<12:57:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4827/12318 [8:21:04<12:57:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4828/12318 [8:21:10<12:57:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4828/12318 [8:21:10<12:57:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4829/12318 [8:21:11<12:57:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4829/12318 [8:21:11<12:57:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4830/12318 [8:21:20<12:57:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4830/12318 [8:21:20<12:57:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|��| 4831/12318 [8:21:29<12:57:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4831/12318 [8:21:29<12:57:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4832/12318 [8:22:02<12:57:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4832/12318 [8:22:02<12:57:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4833/12318 [8:22:07<12:57:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4833/12318 [8:22:07<12:57:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4834/12318 [8:22:09<12:57:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4834/12318 [8:22:09<12:57:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4835/12318 [8:22:10<12:57:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4835/12318 [8:22:10<12:57:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4836/12318 [8:22:17<12:57:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4836/12318 [8:22:17<12:57:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4837/12318 [8:22:24<12:57:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4837/12318 [8:22:24<12:57:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4838/12318 [8:22:29<12:56:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4838/12318 [8:22:29<12:56:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4839/12318 [8:22:33<12:56:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4839/12318 [8:22:33<12:56:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4840/12318 [8:22:42<12:56:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4840/12318 [8:22:42<12:56:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4841/12318 [8:22:44<12:56:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4841/12318 [8:22:44<12:56:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4842/12318 [8:22:48<12:56:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4842/12318 [8:22:48<12:56:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4843/12318 [8:22:52<12:56:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4843/12318 [8:22:52<12:56:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4844/12318 [8:22:55<12:55:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4844/12318 [8:22:55<12:55:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4845/12318 [8:23:02<12:55:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4845/12318 [8:23:02<12:55:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4846/12318 [8:23:07<12:55:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4846/12318 [8:23:07<12:55:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4847/12318 [8:23:09<12:55:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4847/12318 [8:23:09<12:55:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4848/12318 [8:23:15<12:55:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4848/12318 [8:23:15<12:55:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4849/12318 [8:23:21<12:55:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4849/12318 [8:23:21<12:55:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4850/12318 [8:23:22<12:55:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4850/12318 [8:23:22<12:55:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4851/12318 [8:23:26<12:54:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4851/12318 [8:23:26<12:54:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4852/12318 [8:23:33<12:54:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4852/12318 [8:23:33<12:54:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4853/12318 [8:23:40<12:54:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4853/12318 [8:23:40<12:54:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4854/12318 [8:23:42<12:54:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4854/12318 [8:23:42<12:54:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4855/12318 [8:23:51<12:54:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4855/12318 [8:23:51<12:54:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4856/12318 [8:23:56<12:54:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4856/12318 [8:23:56<12:54:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4857/12318 [8:24:03<12:54:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4857/12318 [8:24:03<12:54:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4858/12318 [8:24:08<12:54:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4858/12318 [8:24:08<12:54:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4859/12318 [8:24:14<12:54:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4859/12318 [8:24:14<12:54:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4860/12318 [8:24:19<12:53:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4860/12318 [8:24:19<12:53:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4861/12318 [8:24:26<12:53:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4861/12318 [8:24:26<12:53:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4862/12318 [8:24:32<12:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4862/12318 [8:24:32<12:53:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4863/12318 [8:24:36<12:53:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4863/12318 [8:24:36<12:53:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4864/12318 [8:25:08<12:54:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4864/12318 [8:25:08<12:54:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  39%|▍| 4865/12318 [8:25:16<12:54:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  39%|▍| 4865/12318 [8:25:16<12:54:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4866/12318 [8:25:21<12:53:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4866/12318 [8:25:21<12:53:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4867/12318 [8:25:24<12:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4867/12318 [8:25:24<12:53:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4868/12318 [8:25:30<12:53:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4868/12318 [8:25:30<12:53:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4869/12318 [8:25:37<12:53:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4869/12318 [8:25:37<12:53:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4870/12318 [8:25:40<12:53:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4870/12318 [8:25:40<12:53:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4871/12318 [8:25:45<12:53:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4871/12318 [8:25:45<12:53:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4872/12318 [8:25:48<12:53:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4872/12318 [8:25:48<12:53:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4873/12318 [8:25:55<12:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4873/12318 [8:25:55<12:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4874/12318 [8:25:59<12:52:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4874/12318 [8:25:59<12:52:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4875/12318 [8:26:02<12:52:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4875/12318 [8:26:02<12:52:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4876/12318 [8:26:07<12:52:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4876/12318 [8:26:07<12:52:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4877/12318 [8:26:15<12:52:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4877/12318 [8:26:15<12:52:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4878/12318 [8:26:19<12:52:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4878/12318 [8:26:19<12:52:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4879/12318 [8:26:23<12:52:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4879/12318 [8:26:23<12:52:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4880/12318 [8:26:30<12:52:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4880/12318 [8:26:30<12:52:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4881/12318 [8:26:37<12:51:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4881/12318 [8:26:37<12:51:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4882/12318 [8:26:40<12:51:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4882/12318 [8:26:40<12:51:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4883/12318 [8:26:44<12:51:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4883/12318 [8:26:44<12:51:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4884/12318 [8:26:48<12:51:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4884/12318 [8:26:48<12:51:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4885/12318 [8:26:52<12:51:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4885/12318 [8:26:52<12:51:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4886/12318 [8:26:59<12:51:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4886/12318 [8:26:59<12:51:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4887/12318 [8:27:05<12:51:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4887/12318 [8:27:05<12:51:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4888/12318 [8:27:13<12:51:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4888/12318 [8:27:13<12:51:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4889/12318 [8:27:18<12:50:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4889/12318 [8:27:18<12:50:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4890/12318 [8:27:23<12:50:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4890/12318 [8:27:23<12:50:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4891/12318 [8:27:31<12:50:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4891/12318 [8:27:31<12:50:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4892/12318 [8:27:40<12:50:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4892/12318 [8:27:40<12:50:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4893/12318 [8:27:47<12:50:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4893/12318 [8:27:47<12:50:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4894/12318 [8:27:50<12:50:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4894/12318 [8:27:50<12:50:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4895/12318 [8:27:57<12:50:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4895/12318 [8:27:57<12:50:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4896/12318 [8:28:40<12:51:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4896/12318 [8:28:40<12:51:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4897/12318 [8:28:45<12:50:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4897/12318 [8:28:45<12:50:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4898/12318 [8:28:52<12:50:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4898/12318 [8:28:52<12:50:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4899/12318 [8:28:57<12:50:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4899/12318 [8:28:57<12:50:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4900/12318 [8:29:05<12:50:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4900/12318 [8:29:05<12:50:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4901/12318 [8:29:12<12:50:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4901/12318 [8:29:12<12:50:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4902/12318 [8:29:15<12:50:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4902/12318 [8:29:15<12:50:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4903/12318 [8:29:19<12:50:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4903/12318 [8:29:19<12:50:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4904/12318 [8:29:27<12:50:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4904/12318 [8:29:27<12:50:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4905/12318 [8:29:33<12:50:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4905/12318 [8:29:33<12:50:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4906/12318 [8:29:37<12:49:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4906/12318 [8:29:37<12:49:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4907/12318 [8:29:41<12:49:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4907/12318 [8:29:41<12:49:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4908/12318 [8:29:49<12:49:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4908/12318 [8:29:49<12:49:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4909/12318 [8:29:56<12:49:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4909/12318 [8:29:56<12:49:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4910/12318 [8:30:02<12:49:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4910/12318 [8:30:02<12:49:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4911/12318 [8:30:08<12:49:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4911/12318 [8:30:08<12:49:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4912/12318 [8:30:14<12:49:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4912/12318 [8:30:14<12:49:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4913/12318 [8:30:18<12:49:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4913/12318 [8:30:18<12:49:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4914/12318 [8:30:26<12:49:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4914/12318 [8:30:26<12:49:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4915/12318 [8:30:29<12:48:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4915/12318 [8:30:29<12:48:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4916/12318 [8:30:32<12:48:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4916/12318 [8:30:32<12:48:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4917/12318 [8:30:38<12:48:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4917/12318 [8:30:38<12:48:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4918/12318 [8:30:41<12:48:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4918/12318 [8:30:41<12:48:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4919/12318 [8:30:47<12:48:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4919/12318 [8:30:47<12:48:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4920/12318 [8:30:50<12:48:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4920/12318 [8:30:50<12:48:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4921/12318 [8:30:56<12:48:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4921/12318 [8:30:56<12:48:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4922/12318 [8:30:58<12:47:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4922/12318 [8:30:58<12:47:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4923/12318 [8:31:06<12:47:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4923/12318 [8:31:06<12:47:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4924/12318 [8:31:09<12:47:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4924/12318 [8:31:09<12:47:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4925/12318 [8:31:18<12:47:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4925/12318 [8:31:18<12:47:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4926/12318 [8:31:26<12:47:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4926/12318 [8:31:26<12:47:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4927/12318 [8:31:27<12:47:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4927/12318 [8:31:27<12:47:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4928/12318 [8:31:55<12:47:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4928/12318 [8:31:55<12:47:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4929/12318 [8:32:03<12:47:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4929/12318 [8:32:03<12:47:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4930/12318 [8:32:06<12:47:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4930/12318 [8:32:06<12:47:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4931/12318 [8:32:13<12:47:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4931/12318 [8:32:13<12:47:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4932/12318 [8:32:18<12:47:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4932/12318 [8:32:18<12:47:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4933/12318 [8:32:23<12:47:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4933/12318 [8:32:23<12:47:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4934/12318 [8:32:28<12:46:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4934/12318 [8:32:28<12:46:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4935/12318 [8:32:35<12:46:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4935/12318 [8:32:35<12:46:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4936/12318 [8:32:40<12:46:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4936/12318 [8:32:40<12:46:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4937/12318 [8:32:48<12:46:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4937/12318 [8:32:48<12:46:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4938/12318 [8:32:56<12:46:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4938/12318 [8:32:56<12:46:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4939/12318 [8:33:04<12:46:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4939/12318 [8:33:04<12:46:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4940/12318 [8:33:08<12:46:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4940/12318 [8:33:08<12:46:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4941/12318 [8:33:11<12:46:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4941/12318 [8:33:11<12:46:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4942/12318 [8:33:19<12:46:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4942/12318 [8:33:19<12:46:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4943/12318 [8:33:25<12:46:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4943/12318 [8:33:25<12:46:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4944/12318 [8:33:33<12:45:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4944/12318 [8:33:33<12:45:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4945/12318 [8:33:36<12:45:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4945/12318 [8:33:36<12:45:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4946/12318 [8:33:40<12:45:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4946/12318 [8:33:40<12:45:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4947/12318 [8:33:46<12:45:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4947/12318 [8:33:46<12:45:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4948/12318 [8:33:54<12:45:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4948/12318 [8:33:54<12:45:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4949/12318 [8:34:01<12:45:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4949/12318 [8:34:01<12:45:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4950/12318 [8:34:07<12:45:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4950/12318 [8:34:07<12:45:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4951/12318 [8:34:14<12:45:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4951/12318 [8:34:14<12:45:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4952/12318 [8:34:21<12:45:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4952/12318 [8:34:21<12:45:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4953/12318 [8:34:29<12:45:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4953/12318 [8:34:29<12:45:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4954/12318 [8:34:32<12:44:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4954/12318 [8:34:32<12:44:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4955/12318 [8:34:35<12:44:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4955/12318 [8:34:35<12:44:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4956/12318 [8:34:43<12:44:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4956/12318 [8:34:43<12:44:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4957/12318 [8:34:50<12:44:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4957/12318 [8:34:50<12:44:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4958/12318 [8:34:57<12:44:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4958/12318 [8:34:57<12:44:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4959/12318 [8:35:04<12:44:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4959/12318 [8:35:04<12:44:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4960/12318 [8:35:21<12:44:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4960/12318 [8:35:21<12:44:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4961/12318 [8:35:24<12:44:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4961/12318 [8:35:24<12:44:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4962/12318 [8:35:28<12:44:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4962/12318 [8:35:28<12:44:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4963/12318 [8:35:30<12:43:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4963/12318 [8:35:30<12:43:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4964/12318 [8:35:37<12:43:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4964/12318 [8:35:37<12:43:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4965/12318 [8:35:45<12:43:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4965/12318 [8:35:45<12:43:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4966/12318 [8:35:46<12:43:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4966/12318 [8:35:46<12:43:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4967/12318 [8:35:54<12:43:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4967/12318 [8:35:54<12:43:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4968/12318 [8:36:00<12:43:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4968/12318 [8:36:00<12:43:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4969/12318 [8:36:02<12:43:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4969/12318 [8:36:02<12:43:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4970/12318 [8:36:04<12:43:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4970/12318 [8:36:04<12:43:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4971/12318 [8:36:09<12:42:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4971/12318 [8:36:09<12:42:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4972/12318 [8:36:14<12:42:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4972/12318 [8:36:14<12:42:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4973/12318 [8:36:16<12:42:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4973/12318 [8:36:16<12:42:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4974/12318 [8:36:24<12:42:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4974/12318 [8:36:24<12:42:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4975/12318 [8:36:30<12:42:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4975/12318 [8:36:30<12:42:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4976/12318 [8:36:34<12:42:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4976/12318 [8:36:34<12:42:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4977/12318 [8:36:40<12:42:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4977/12318 [8:36:40<12:42:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4978/12318 [8:36:46<12:41:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4978/12318 [8:36:46<12:41:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4979/12318 [8:36:50<12:41:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4979/12318 [8:36:50<12:41:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4980/12318 [8:36:56<12:41:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4980/12318 [8:36:56<12:41:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4981/12318 [8:37:04<12:41:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4981/12318 [8:37:04<12:41:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4982/12318 [8:37:08<12:41:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4982/12318 [8:37:08<12:41:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4983/12318 [8:37:10<12:41:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4983/12318 [8:37:10<12:41:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4984/12318 [8:37:18<12:41:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4984/12318 [8:37:18<12:41:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4985/12318 [8:37:20<12:41:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4985/12318 [8:37:20<12:41:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4986/12318 [8:37:26<12:40:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4986/12318 [8:37:26<12:40:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4987/12318 [8:37:33<12:40:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4987/12318 [8:37:33<12:40:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  40%|▍| 4988/12318 [8:37:40<12:40:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  40%|▍| 4988/12318 [8:37:40<12:40:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4989/12318 [8:37:42<12:40:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4989/12318 [8:37:42<12:40:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4990/12318 [8:37:47<12:40:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4990/12318 [8:37:47<12:40:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4991/12318 [8:37:50<12:40:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4991/12318 [8:37:50<12:40:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4992/12318 [8:38:32<12:40:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4992/12318 [8:38:32<12:40:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4993/12318 [8:38:37<12:40:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4993/12318 [8:38:37<12:40:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4994/12318 [8:38:41<12:40:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4994/12318 [8:38:41<12:40:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4995/12318 [8:38:45<12:40:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4995/12318 [8:38:45<12:40:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4996/12318 [8:38:48<12:40:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4996/12318 [8:38:48<12:40:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4997/12318 [8:38:57<12:40:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4997/12318 [8:38:57<12:40:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4998/12318 [8:39:02<12:40:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4998/12318 [8:39:02<12:40:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 4999/12318 [8:39:03<12:39:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 4999/12318 [8:39:03<12:39:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5000/12318 [8:39:12<12:39:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5000/12318 [8:39:12<12:39:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5001/12318 [8:39:19<12:39:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5001/12318 [8:39:19<12:39:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5002/12318 [8:39:27<12:39:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5002/12318 [8:39:27<12:39:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5003/12318 [8:39:30<12:39:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5003/12318 [8:39:30<12:39:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5004/12318 [8:39:36<12:39:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5004/12318 [8:39:36<12:39:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5005/12318 [8:39:42<12:39:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5005/12318 [8:39:42<12:39:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5006/12318 [8:39:47<12:39:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5006/12318 [8:39:47<12:39:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5007/12318 [8:39:51<12:39:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5007/12318 [8:39:51<12:39:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5008/12318 [8:39:57<12:38:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5008/12318 [8:39:57<12:38:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5009/12318 [8:39:58<12:38:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5009/12318 [8:39:58<12:38:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5010/12318 [8:40:07<12:38:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5010/12318 [8:40:07<12:38:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5011/12318 [8:40:13<12:38:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5011/12318 [8:40:13<12:38:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5012/12318 [8:40:20<12:38:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5012/12318 [8:40:20<12:38:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5013/12318 [8:40:24<12:38:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5013/12318 [8:40:24<12:38:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5014/12318 [8:40:30<12:38:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5014/12318 [8:40:30<12:38:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5015/12318 [8:40:39<12:38:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5015/12318 [8:40:39<12:38:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5016/12318 [8:40:44<12:38:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5016/12318 [8:40:44<12:38:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5017/12318 [8:40:48<12:37:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5017/12318 [8:40:48<12:37:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5018/12318 [8:40:55<12:37:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5018/12318 [8:40:55<12:37:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5019/12318 [8:40:56<12:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5019/12318 [8:40:56<12:37:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5020/12318 [8:41:00<12:37:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5020/12318 [8:41:00<12:37:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5021/12318 [8:41:04<12:37:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5021/12318 [8:41:04<12:37:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5022/12318 [8:41:07<12:37:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5022/12318 [8:41:07<12:37:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5023/12318 [8:41:12<12:36:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5023/12318 [8:41:12<12:36:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5024/12318 [8:41:52<12:37:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5024/12318 [8:41:52<12:37:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5025/12318 [8:41:59<12:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5025/12318 [8:41:59<12:37:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5026/12318 [8:42:05<12:37:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5026/12318 [8:42:05<12:37:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5027/12318 [8:42:10<12:37:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5027/12318 [8:42:10<12:37:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5028/12318 [8:42:16<12:37:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5028/12318 [8:42:16<12:37:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5029/12318 [8:42:24<12:37:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5029/12318 [8:42:24<12:37:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5030/12318 [8:42:27<12:37:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5030/12318 [8:42:27<12:37:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5031/12318 [8:42:35<12:36:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5031/12318 [8:42:35<12:36:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5032/12318 [8:42:41<12:36:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5032/12318 [8:42:41<12:36:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5033/12318 [8:42:46<12:36:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5033/12318 [8:42:46<12:36:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5034/12318 [8:42:52<12:36:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5034/12318 [8:42:52<12:36:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5035/12318 [8:43:01<12:36:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5035/12318 [8:43:01<12:36:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5036/12318 [8:43:07<12:36:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5036/12318 [8:43:07<12:36:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5037/12318 [8:43:12<12:36:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5037/12318 [8:43:12<12:36:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5038/12318 [8:43:18<12:36:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5038/12318 [8:43:18<12:36:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5039/12318 [8:43:19<12:35:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5039/12318 [8:43:19<12:35:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5040/12318 [8:43:27<12:35:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5040/12318 [8:43:27<12:35:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5041/12318 [8:43:33<12:35:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5041/12318 [8:43:33<12:35:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5042/12318 [8:43:42<12:35:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5042/12318 [8:43:42<12:35:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5043/12318 [8:43:49<12:35:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5043/12318 [8:43:49<12:35:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5044/12318 [8:43:51<12:35:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5044/12318 [8:43:51<12:35:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5045/12318 [8:43:55<12:35:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5045/12318 [8:43:55<12:35:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5046/12318 [8:44:02<12:35:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5046/12318 [8:44:02<12:35:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5047/12318 [8:44:09<12:35:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5047/12318 [8:44:09<12:35:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5048/12318 [8:44:16<12:35:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5048/12318 [8:44:16<12:35:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5049/12318 [8:44:21<12:34:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5049/12318 [8:44:21<12:34:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5050/12318 [8:44:26<12:34:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5050/12318 [8:44:26<12:34:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5051/12318 [8:44:30<12:34:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5051/12318 [8:44:30<12:34:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5052/12318 [8:44:38<12:34:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5052/12318 [8:44:38<12:34:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5053/12318 [8:44:44<12:34:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5053/12318 [8:44:44<12:34:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5054/12318 [8:44:46<12:34:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5054/12318 [8:44:46<12:34:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5055/12318 [8:44:53<12:34:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5055/12318 [8:44:53<12:34:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5056/12318 [8:45:10<12:34:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5056/12318 [8:45:10<12:34:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5057/12318 [8:45:19<12:34:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5057/12318 [8:45:19<12:34:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5058/12318 [8:45:25<12:34:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5058/12318 [8:45:25<12:34:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5059/12318 [8:45:34<12:34:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5059/12318 [8:45:34<12:34:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5060/12318 [8:45:42<12:34:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5060/12318 [8:45:42<12:34:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5061/12318 [8:45:50<12:34:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5061/12318 [8:45:50<12:34:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5062/12318 [8:45:55<12:33:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5062/12318 [8:45:55<12:33:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5063/12318 [8:46:03<12:33:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5063/12318 [8:46:03<12:33:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5064/12318 [8:46:10<12:33:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5064/12318 [8:46:10<12:33:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5065/12318 [8:46:16<12:33:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5065/12318 [8:46:16<12:33:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5066/12318 [8:46:21<12:33:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5066/12318 [8:46:21<12:33:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5067/12318 [8:46:24<12:33:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5067/12318 [8:46:24<12:33:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5068/12318 [8:46:29<12:33:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5068/12318 [8:46:29<12:33:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5069/12318 [8:46:37<12:33:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5069/12318 [8:46:37<12:33:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5070/12318 [8:46:45<12:33:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5070/12318 [8:46:45<12:33:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5071/12318 [8:46:53<12:32:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5071/12318 [8:46:53<12:32:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5072/12318 [8:46:59<12:32:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5072/12318 [8:46:59<12:32:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5073/12318 [8:47:05<12:32:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5073/12318 [8:47:05<12:32:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5074/12318 [8:47:12<12:32:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5074/12318 [8:47:12<12:32:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5075/12318 [8:47:14<12:32:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5075/12318 [8:47:14<12:32:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5076/12318 [8:47:21<12:32:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5076/12318 [8:47:21<12:32:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5077/12318 [8:47:26<12:32:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5077/12318 [8:47:26<12:32:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5078/12318 [8:47:33<12:32:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5078/12318 [8:47:33<12:32:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5079/12318 [8:47:34<12:31:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5079/12318 [8:47:34<12:31:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5080/12318 [8:47:39<12:31:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5080/12318 [8:47:39<12:31:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5081/12318 [8:47:44<12:31:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5081/12318 [8:47:44<12:31:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5082/12318 [8:47:48<12:31:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5082/12318 [8:47:48<12:31:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5083/12318 [8:47:56<12:31:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5083/12318 [8:47:56<12:31:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5084/12318 [8:47:58<12:31:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5084/12318 [8:47:58<12:31:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5085/12318 [8:48:04<12:31:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5085/12318 [8:48:04<12:31:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5086/12318 [8:48:06<12:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5086/12318 [8:48:06<12:30:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5087/12318 [8:48:11<12:30:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5087/12318 [8:48:11<12:30:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5088/12318 [8:48:25<12:30:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5088/12318 [8:48:25<12:30:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5089/12318 [8:48:32<12:30:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5089/12318 [8:48:33<12:30:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5090/12318 [8:48:39<12:30:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5090/12318 [8:48:39<12:30:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5091/12318 [8:48:45<12:30:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5091/12318 [8:48:45<12:30:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5092/12318 [8:48:49<12:30:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5092/12318 [8:48:49<12:30:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5093/12318 [8:48:52<12:30:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5093/12318 [8:48:52<12:30:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5094/12318 [8:48:58<12:30:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5094/12318 [8:48:58<12:30:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5095/12318 [8:49:00<12:29:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5095/12318 [8:49:00<12:29:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5096/12318 [8:49:01<12:29:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5096/12318 [8:49:01<12:29:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5097/12318 [8:49:10<12:29:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5097/12318 [8:49:10<12:29:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5098/12318 [8:49:15<12:29:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5098/12318 [8:49:15<12:29:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5099/12318 [8:49:19<12:29:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5099/12318 [8:49:19<12:29:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5100/12318 [8:49:24<12:29:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5100/12318 [8:49:24<12:29:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5101/12318 [8:49:28<12:29:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5101/12318 [8:49:28<12:29:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5102/12318 [8:49:35<12:29:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5102/12318 [8:49:35<12:29:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5103/12318 [8:49:38<12:28:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5103/12318 [8:49:38<12:28:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5104/12318 [8:49:44<12:28:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5104/12318 [8:49:44<12:28:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5105/12318 [8:49:49<12:28:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5105/12318 [8:49:49<12:28:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5106/12318 [8:49:54<12:28:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5106/12318 [8:49:54<12:28:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5107/12318 [8:50:03<12:28:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5107/12318 [8:50:03<12:28:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5108/12318 [8:50:05<12:28:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5108/12318 [8:50:05<12:28:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5109/12318 [8:50:12<12:28:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5109/12318 [8:50:12<12:28:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5110/12318 [8:50:16<12:27:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5110/12318 [8:50:16<12:27:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  41%|▍| 5111/12318 [8:50:22<12:27:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  41%|▍| 5111/12318 [8:50:22<12:27:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5112/12318 [8:50:27<12:27:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5112/12318 [8:50:27<12:27:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5113/12318 [8:50:30<12:27:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5113/12318 [8:50:30<12:27:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5114/12318 [8:50:34<12:27:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5114/12318 [8:50:34<12:27:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5115/12318 [8:50:35<12:27:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5115/12318 [8:50:35<12:27:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5116/12318 [8:50:38<12:27:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5116/12318 [8:50:38<12:27:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5117/12318 [8:50:47<12:26:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5117/12318 [8:50:47<12:26:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5118/12318 [8:50:56<12:26:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5118/12318 [8:50:56<12:26:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5119/12318 [8:51:05<12:26:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5119/12318 [8:51:05<12:26:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5120/12318 [8:51:39<12:27:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5120/12318 [8:51:39<12:27:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5121/12318 [8:51:46<12:27:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5121/12318 [8:51:46<12:27:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5122/12318 [8:51:52<12:27:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5122/12318 [8:51:52<12:27:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5123/12318 [8:51:56<12:27:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5123/12318 [8:51:56<12:27:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5124/12318 [8:52:02<12:26:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5124/12318 [8:52:02<12:26:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5125/12318 [8:52:09<12:26:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5125/12318 [8:52:09<12:26:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5126/12318 [8:52:16<12:26:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5126/12318 [8:52:16<12:26:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5127/12318 [8:52:23<12:26:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5127/12318 [8:52:23<12:26:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5128/12318 [8:52:27<12:26:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5128/12318 [8:52:27<12:26:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5129/12318 [8:52:36<12:26:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5129/12318 [8:52:36<12:26:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5130/12318 [8:52:43<12:26:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5130/12318 [8:52:43<12:26:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5131/12318 [8:52:49<12:26:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5131/12318 [8:52:49<12:26:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5132/12318 [8:52:51<12:26:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5132/12318 [8:52:51<12:26:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5133/12318 [8:52:58<12:26:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5133/12318 [8:52:58<12:26:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5134/12318 [8:53:02<12:25:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5134/12318 [8:53:02<12:25:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5135/12318 [8:53:08<12:25:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5135/12318 [8:53:08<12:25:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5136/12318 [8:53:15<12:25:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5136/12318 [8:53:15<12:25:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5137/12318 [8:53:21<12:25:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5137/12318 [8:53:21<12:25:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5138/12318 [8:53:26<12:25:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5138/12318 [8:53:26<12:25:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5139/12318 [8:53:31<12:25:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5139/12318 [8:53:31<12:25:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5140/12318 [8:53:33<12:25:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5140/12318 [8:53:33<12:25:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5141/12318 [8:53:38<12:24:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5141/12318 [8:53:38<12:24:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5142/12318 [8:53:40<12:24:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5142/12318 [8:53:40<12:24:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5143/12318 [8:53:45<12:24:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5143/12318 [8:53:45<12:24:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5144/12318 [8:53:53<12:24:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5144/12318 [8:53:53<12:24:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5145/12318 [8:54:01<12:24:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5145/12318 [8:54:01<12:24:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5146/12318 [8:54:03<12:24:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5146/12318 [8:54:03<12:24:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5147/12318 [8:54:12<12:24:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5147/12318 [8:54:12<12:24:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5148/12318 [8:54:18<12:24:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5148/12318 [8:54:18<12:24:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5149/12318 [8:54:27<12:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5149/12318 [8:54:27<12:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5150/12318 [8:54:33<12:24:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5150/12318 [8:54:33<12:24:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5151/12318 [8:54:37<12:23:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5151/12318 [8:54:37<12:23:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5152/12318 [8:54:59<12:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5152/12318 [8:54:59<12:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5153/12318 [8:55:04<12:23:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5153/12318 [8:55:04<12:23:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5154/12318 [8:55:12<12:23:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5154/12318 [8:55:12<12:23:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5155/12318 [8:55:21<12:23:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5155/12318 [8:55:21<12:23:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5156/12318 [8:55:26<12:23:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5156/12318 [8:55:26<12:23:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5157/12318 [8:55:32<12:23:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5157/12318 [8:55:32<12:23:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5158/12318 [8:55:37<12:23:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5158/12318 [8:55:37<12:23:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5159/12318 [8:55:46<12:23:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5159/12318 [8:55:46<12:23:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5160/12318 [8:55:52<12:23:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5160/12318 [8:55:52<12:23:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5161/12318 [8:56:00<12:23:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5161/12318 [8:56:00<12:23:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5162/12318 [8:56:04<12:23:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5162/12318 [8:56:04<12:23:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5163/12318 [8:56:10<12:23:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5163/12318 [8:56:10<12:23:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5164/12318 [8:56:18<12:22:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5164/12318 [8:56:18<12:22:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5165/12318 [8:56:23<12:22:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5165/12318 [8:56:23<12:22:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5166/12318 [8:56:25<12:22:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5166/12318 [8:56:25<12:22:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5167/12318 [8:56:28<12:22:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5167/12318 [8:56:28<12:22:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5168/12318 [8:56:37<12:22:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5168/12318 [8:56:37<12:22:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5169/12318 [8:56:43<12:22:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5169/12318 [8:56:43<12:22:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5170/12318 [8:56:48<12:22:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5170/12318 [8:56:48<12:22:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5171/12318 [8:56:52<12:22:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5171/12318 [8:56:52<12:22:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5172/12318 [8:56:53<12:21:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5172/12318 [8:56:53<12:21:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5173/12318 [8:57:00<12:21:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5173/12318 [8:57:00<12:21:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5174/12318 [8:57:05<12:21:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5174/12318 [8:57:05<12:21:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5175/12318 [8:57:14<12:21:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5175/12318 [8:57:14<12:21:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5176/12318 [8:57:20<12:21:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5176/12318 [8:57:20<12:21:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5177/12318 [8:57:23<12:21:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5177/12318 [8:57:23<12:21:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5178/12318 [8:57:32<12:21:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5178/12318 [8:57:32<12:21:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5179/12318 [8:57:38<12:21:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5179/12318 [8:57:38<12:21:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5180/12318 [8:57:42<12:20:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5180/12318 [8:57:42<12:20:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5181/12318 [8:57:48<12:20:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5181/12318 [8:57:48<12:20:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5182/12318 [8:57:55<12:20:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5182/12318 [8:57:55<12:20:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5183/12318 [8:58:00<12:20:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5183/12318 [8:58:00<12:20:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5184/12318 [8:58:17<12:20:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5184/12318 [8:58:17<12:20:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5185/12318 [8:58:25<12:20:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5185/12318 [8:58:25<12:20:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5186/12318 [8:58:30<12:20:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5186/12318 [8:58:30<12:20:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5187/12318 [8:58:35<12:20:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5187/12318 [8:58:35<12:20:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5188/12318 [8:58:41<12:20:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5188/12318 [8:58:41<12:20:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5189/12318 [8:58:46<12:20:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5189/12318 [8:58:46<12:20:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5190/12318 [8:58:48<12:19:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5190/12318 [8:58:48<12:19:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5191/12318 [8:58:53<12:19:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5191/12318 [8:58:53<12:19:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5192/12318 [8:59:00<12:19:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5192/12318 [8:59:00<12:19:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5193/12318 [8:59:09<12:19:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5193/12318 [8:59:09<12:19:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5194/12318 [8:59:13<12:19:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5194/12318 [8:59:13<12:19:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5195/12318 [8:59:22<12:19:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5195/12318 [8:59:22<12:19:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5196/12318 [8:59:27<12:19:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5196/12318 [8:59:27<12:19:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5197/12318 [8:59:35<12:19:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5197/12318 [8:59:35<12:19:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5198/12318 [8:59:41<12:19:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5198/12318 [8:59:41<12:19:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5199/12318 [8:59:49<12:19:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5199/12318 [8:59:49<12:19:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5200/12318 [8:59:54<12:19:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5200/12318 [8:59:54<12:19:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5201/12318 [9:00:03<12:19:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5201/12318 [9:00:03<12:19:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5202/12318 [9:00:08<12:18:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5202/12318 [9:00:08<12:18:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5203/12318 [9:00:10<12:18:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5203/12318 [9:00:10<12:18:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5204/12318 [9:00:11<12:18:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5204/12318 [9:00:11<12:18:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5205/12318 [9:00:19<12:18:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5205/12318 [9:00:19<12:18:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5206/12318 [9:00:25<12:18:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5206/12318 [9:00:25<12:18:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5207/12318 [9:00:28<12:18:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5207/12318 [9:00:28<12:18:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5208/12318 [9:00:34<12:18:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5208/12318 [9:00:34<12:18:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5209/12318 [9:00:43<12:17:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5209/12318 [9:00:43<12:17:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5210/12318 [9:00:46<12:17:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5210/12318 [9:00:46<12:17:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5211/12318 [9:00:48<12:17:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5211/12318 [9:00:48<12:17:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5212/12318 [9:00:53<12:17:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5212/12318 [9:00:53<12:17:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5213/12318 [9:00:57<12:17:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5213/12318 [9:00:57<12:17:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5214/12318 [9:00:59<12:17:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5214/12318 [9:00:59<12:17:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5215/12318 [9:01:07<12:17:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5215/12318 [9:01:07<12:17:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5216/12318 [9:01:41<12:17:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5216/12318 [9:01:41<12:17:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5217/12318 [9:01:46<12:17:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5217/12318 [9:01:46<12:17:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5218/12318 [9:01:54<12:17:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5218/12318 [9:01:54<12:17:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5219/12318 [9:01:57<12:17:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5219/12318 [9:01:57<12:17:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5220/12318 [9:02:01<12:17:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5220/12318 [9:02:01<12:17:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5221/12318 [9:02:05<12:16:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5221/12318 [9:02:05<12:16:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5222/12318 [9:02:11<12:16:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5222/12318 [9:02:11<12:16:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5223/12318 [9:02:20<12:16:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5223/12318 [9:02:20<12:16:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5224/12318 [9:02:24<12:16:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5224/12318 [9:02:24<12:16:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5225/12318 [9:02:33<12:16:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5225/12318 [9:02:33<12:16:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5226/12318 [9:02:39<12:16:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5226/12318 [9:02:39<12:16:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5227/12318 [9:02:44<12:16:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5227/12318 [9:02:44<12:16:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5228/12318 [9:02:49<12:16:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5228/12318 [9:02:49<12:16:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5229/12318 [9:02:57<12:16:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5229/12318 [9:02:57<12:16:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5230/12318 [9:03:05<12:16:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5230/12318 [9:03:05<12:16:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5231/12318 [9:03:06<12:15:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5231/12318 [9:03:06<12:15:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5232/12318 [9:03:14<12:15:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5232/12318 [9:03:14<12:15:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5233/12318 [9:03:15<12:15:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5233/12318 [9:03:15<12:15:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5234/12318 [9:03:16<12:15:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5234/12318 [9:03:16<12:15:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  42%|▍| 5235/12318 [9:03:18<12:15:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  42%|▍| 5235/12318 [9:03:18<12:15:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5236/12318 [9:03:22<12:14:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5236/12318 [9:03:22<12:14:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5237/12318 [9:03:26<12:14:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5237/12318 [9:03:26<12:14:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5238/12318 [9:03:34<12:14:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5238/12318 [9:03:34<12:14:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5239/12318 [9:03:40<12:14:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5239/12318 [9:03:40<12:14:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5240/12318 [9:03:48<12:14:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5240/12318 [9:03:48<12:14:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5241/12318 [9:03:53<12:14:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5241/12318 [9:03:53<12:14:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5242/12318 [9:04:02<12:14:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5242/12318 [9:04:02<12:14:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5243/12318 [9:04:06<12:14:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5243/12318 [9:04:06<12:14:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5244/12318 [9:04:15<12:14:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5244/12318 [9:04:15<12:14:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5245/12318 [9:04:24<12:14:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5245/12318 [9:04:24<12:14:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5246/12318 [9:04:30<12:14:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5246/12318 [9:04:30<12:14:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5247/12318 [9:04:32<12:13:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5247/12318 [9:04:32<12:13:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5248/12318 [9:05:11<12:14:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5248/12318 [9:05:11<12:14:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5249/12318 [9:05:16<12:14:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5249/12318 [9:05:16<12:14:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5250/12318 [9:05:24<12:14:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5250/12318 [9:05:24<12:14:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5251/12318 [9:05:25<12:14:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5251/12318 [9:05:25<12:14:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5252/12318 [9:05:26<12:13:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5252/12318 [9:05:26<12:13:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5253/12318 [9:05:32<12:13:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5253/12318 [9:05:32<12:13:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5254/12318 [9:05:38<12:13:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5254/12318 [9:05:38<12:13:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5255/12318 [9:05:41<12:13:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5255/12318 [9:05:41<12:13:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5256/12318 [9:05:44<12:13:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5256/12318 [9:05:44<12:13:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5257/12318 [9:05:53<12:13:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5257/12318 [9:05:53<12:13:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5258/12318 [9:05:57<12:13:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5258/12318 [9:05:57<12:13:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5259/12318 [9:06:00<12:12:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5259/12318 [9:06:00<12:12:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5260/12318 [9:06:05<12:12:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5260/12318 [9:06:05<12:12:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5261/12318 [9:06:07<12:12:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5261/12318 [9:06:07<12:12:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5262/12318 [9:06:12<12:12:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5262/12318 [9:06:12<12:12:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5263/12318 [9:06:21<12:12:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5263/12318 [9:06:21<12:12:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5264/12318 [9:06:27<12:12:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5264/12318 [9:06:27<12:12:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5265/12318 [9:06:29<12:12:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5265/12318 [9:06:29<12:12:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5266/12318 [9:06:34<12:11:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5266/12318 [9:06:34<12:11:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5267/12318 [9:06:39<12:11:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5267/12318 [9:06:39<12:11:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5268/12318 [9:06:40<12:11:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5268/12318 [9:06:40<12:11:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5269/12318 [9:06:43<12:11:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5269/12318 [9:06:43<12:11:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5270/12318 [9:06:46<12:11:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5270/12318 [9:06:46<12:11:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5271/12318 [9:06:55<12:11:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5271/12318 [9:06:55<12:11:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5272/12318 [9:06:58<12:11:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5272/12318 [9:06:58<12:11:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5273/12318 [9:07:03<12:10:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5273/12318 [9:07:03<12:10:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5274/12318 [9:07:09<12:10:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5274/12318 [9:07:09<12:10:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5275/12318 [9:07:16<12:10:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5275/12318 [9:07:16<12:10:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5276/12318 [9:07:23<12:10:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5276/12318 [9:07:23<12:10:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5277/12318 [9:07:28<12:10:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5277/12318 [9:07:28<12:10:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5278/12318 [9:07:36<12:10:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5278/12318 [9:07:36<12:10:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5279/12318 [9:07:41<12:10:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5279/12318 [9:07:41<12:10:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5280/12318 [9:08:25<12:11:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5280/12318 [9:08:25<12:11:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5281/12318 [9:08:33<12:10:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5281/12318 [9:08:33<12:10:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5282/12318 [9:08:36<12:10:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5282/12318 [9:08:36<12:10:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5283/12318 [9:08:40<12:10:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5283/12318 [9:08:40<12:10:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5284/12318 [9:08:45<12:10:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5284/12318 [9:08:45<12:10:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5285/12318 [9:08:48<12:10:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5285/12318 [9:08:48<12:10:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5286/12318 [9:08:53<12:10:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5286/12318 [9:08:53<12:10:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5287/12318 [9:08:57<12:10:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5287/12318 [9:08:57<12:10:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5288/12318 [9:09:00<12:09:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5288/12318 [9:09:00<12:09:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5289/12318 [9:09:01<12:09:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5289/12318 [9:09:01<12:09:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5290/12318 [9:09:07<12:09:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5290/12318 [9:09:07<12:09:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5291/12318 [9:09:15<12:09:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5291/12318 [9:09:15<12:09:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5292/12318 [9:09:20<12:09:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5292/12318 [9:09:20<12:09:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5293/12318 [9:09:23<12:09:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5293/12318 [9:09:23<12:09:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5294/12318 [9:09:29<12:09:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5294/12318 [9:09:29<12:09:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5295/12318 [9:09:31<12:08:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5295/12318 [9:09:31<12:08:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5296/12318 [9:09:40<12:08:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5296/12318 [9:09:40<12:08:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5297/12318 [9:09:44<12:08:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5297/12318 [9:09:44<12:08:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5298/12318 [9:09:46<12:08:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5298/12318 [9:09:46<12:08:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5299/12318 [9:09:52<12:08:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5299/12318 [9:09:52<12:08:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5300/12318 [9:09:58<12:08:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5300/12318 [9:09:58<12:08:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5301/12318 [9:10:05<12:08:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5301/12318 [9:10:05<12:08:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5302/12318 [9:10:11<12:08:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5302/12318 [9:10:11<12:08:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5303/12318 [9:10:16<12:07:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5303/12318 [9:10:16<12:07:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5304/12318 [9:10:20<12:07:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5304/12318 [9:10:20<12:07:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5305/12318 [9:10:23<12:07:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5305/12318 [9:10:23<12:07:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5306/12318 [9:10:32<12:07:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5306/12318 [9:10:32<12:07:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5307/12318 [9:10:35<12:07:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5307/12318 [9:10:35<12:07:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5308/12318 [9:10:43<12:07:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5308/12318 [9:10:43<12:07:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5309/12318 [9:10:49<12:07:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5309/12318 [9:10:49<12:07:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5310/12318 [9:10:54<12:07:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5310/12318 [9:10:54<12:07:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5311/12318 [9:11:03<12:07:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5311/12318 [9:11:03<12:07:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5312/12318 [9:11:33<12:07:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5312/12318 [9:11:33<12:07:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5313/12318 [9:11:41<12:07:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5313/12318 [9:11:41<12:07:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5314/12318 [9:11:43<12:07:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5314/12318 [9:11:43<12:07:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5315/12318 [9:11:50<12:07:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5315/12318 [9:11:50<12:07:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5316/12318 [9:11:58<12:07:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5316/12318 [9:11:58<12:07:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5317/12318 [9:12:06<12:06:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5317/12318 [9:12:06<12:06:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5318/12318 [9:12:13<12:06:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5318/12318 [9:12:13<12:06:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5319/12318 [9:12:17<12:06:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5319/12318 [9:12:17<12:06:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5320/12318 [9:12:22<12:06:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5320/12318 [9:12:22<12:06:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5321/12318 [9:12:28<12:06:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5321/12318 [9:12:28<12:06:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5322/12318 [9:12:33<12:06:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5322/12318 [9:12:33<12:06:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5323/12318 [9:12:36<12:06:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5323/12318 [9:12:36<12:06:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5324/12318 [9:12:41<12:06:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5324/12318 [9:12:41<12:06:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5325/12318 [9:12:48<12:05:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5325/12318 [9:12:48<12:05:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5326/12318 [9:12:52<12:05:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5326/12318 [9:12:52<12:05:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5327/12318 [9:12:56<12:05:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5327/12318 [9:12:56<12:05:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5328/12318 [9:13:04<12:05:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5328/12318 [9:13:04<12:05:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5329/12318 [9:13:08<12:05:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5329/12318 [9:13:08<12:05:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5330/12318 [9:13:13<12:05:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5330/12318 [9:13:13<12:05:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5331/12318 [9:13:18<12:05:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5331/12318 [9:13:18<12:05:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5332/12318 [9:13:27<12:05:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5332/12318 [9:13:27<12:05:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5333/12318 [9:13:31<12:04:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5333/12318 [9:13:31<12:04:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5334/12318 [9:13:40<12:04:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5334/12318 [9:13:40<12:04:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5335/12318 [9:13:43<12:04:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5335/12318 [9:13:43<12:04:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5336/12318 [9:13:44<12:04:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5336/12318 [9:13:44<12:04:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5337/12318 [9:13:46<12:04:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5337/12318 [9:13:46<12:04:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5338/12318 [9:13:50<12:04:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5338/12318 [9:13:50<12:04:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5339/12318 [9:13:55<12:04:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5339/12318 [9:13:55<12:04:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5340/12318 [9:13:57<12:03:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5340/12318 [9:13:57<12:03:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5341/12318 [9:14:02<12:03:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5341/12318 [9:14:02<12:03:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5342/12318 [9:14:10<12:03:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5342/12318 [9:14:10<12:03:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5343/12318 [9:14:14<12:03:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5343/12318 [9:14:14<12:03:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5344/12318 [9:15:19<12:04:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5344/12318 [9:15:20<12:04:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5345/12318 [9:15:24<12:04:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5345/12318 [9:15:24<12:04:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5346/12318 [9:15:25<12:04:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5346/12318 [9:15:25<12:04:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5347/12318 [9:15:33<12:04:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5347/12318 [9:15:33<12:04:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5348/12318 [9:15:42<12:04:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5348/12318 [9:15:42<12:04:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5349/12318 [9:15:50<12:04:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5349/12318 [9:15:50<12:04:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5350/12318 [9:15:53<12:04:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5350/12318 [9:15:53<12:04:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5351/12318 [9:15:57<12:03:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5351/12318 [9:15:57<12:03:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5352/12318 [9:16:01<12:03:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5352/12318 [9:16:01<12:03:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5353/12318 [9:16:06<12:03:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5353/12318 [9:16:06<12:03:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5354/12318 [9:16:14<12:03:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5354/12318 [9:16:14<12:03:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5355/12318 [9:16:17<12:03:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5355/12318 [9:16:17<12:03:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5356/12318 [9:16:23<12:03:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5356/12318 [9:16:23<12:03:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5357/12318 [9:16:32<12:03:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5357/12318 [9:16:32<12:03:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  43%|▍| 5358/12318 [9:16:38<12:03:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  43%|▍| 5358/12318 [9:16:38<12:03:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5359/12318 [9:16:39<12:02:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5359/12318 [9:16:39<12:02:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5360/12318 [9:16:45<12:02:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5360/12318 [9:16:45<12:02:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5361/12318 [9:16:49<12:02:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5361/12318 [9:16:49<12:02:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5362/12318 [9:16:52<12:02:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5362/12318 [9:16:52<12:02:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5363/12318 [9:16:55<12:02:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5363/12318 [9:16:55<12:02:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5364/12318 [9:17:02<12:02:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5364/12318 [9:17:02<12:02:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5365/12318 [9:17:03<12:01:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5365/12318 [9:17:03<12:01:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5366/12318 [9:17:07<12:01:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5366/12318 [9:17:07<12:01:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5367/12318 [9:17:08<12:01:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5367/12318 [9:17:08<12:01:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5368/12318 [9:17:15<12:01:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5368/12318 [9:17:15<12:01:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5369/12318 [9:17:20<12:01:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5369/12318 [9:17:20<12:01:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5370/12318 [9:17:26<12:01:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5370/12318 [9:17:26<12:01:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5371/12318 [9:17:29<12:01:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5371/12318 [9:17:29<12:01:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5372/12318 [9:17:35<12:00:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5372/12318 [9:17:35<12:00:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5373/12318 [9:17:40<12:00:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5373/12318 [9:17:40<12:00:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5374/12318 [9:17:49<12:00:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5374/12318 [9:17:49<12:00:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5375/12318 [9:17:52<12:00:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5375/12318 [9:17:52<12:00:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5376/12318 [9:18:46<12:01:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5376/12318 [9:18:46<12:01:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5377/12318 [9:18:50<12:01:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5377/12318 [9:18:50<12:01:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5378/12318 [9:18:55<12:01:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5378/12318 [9:18:55<12:01:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5379/12318 [9:19:03<12:01:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5379/12318 [9:19:03<12:01:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5380/12318 [9:19:09<12:01:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5380/12318 [9:19:09<12:01:04,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5381/12318 [9:19:11<12:00:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5381/12318 [9:19:11<12:00:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5382/12318 [9:19:14<12:00:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5382/12318 [9:19:14<12:00:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5383/12318 [9:19:17<12:00:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5383/12318 [9:19:17<12:00:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5384/12318 [9:19:21<12:00:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5384/12318 [9:19:21<12:00:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5385/12318 [9:19:30<12:00:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5385/12318 [9:19:30<12:00:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5386/12318 [9:19:35<12:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5386/12318 [9:19:35<12:00:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5387/12318 [9:19:39<12:00:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5387/12318 [9:19:39<12:00:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5388/12318 [9:19:41<11:59:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5388/12318 [9:19:41<11:59:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5389/12318 [9:19:45<11:59:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5389/12318 [9:19:45<11:59:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5390/12318 [9:19:51<11:59:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5390/12318 [9:19:51<11:59:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5391/12318 [9:19:55<11:59:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5391/12318 [9:19:55<11:59:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5392/12318 [9:19:58<11:59:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5392/12318 [9:19:58<11:59:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5393/12318 [9:20:04<11:59:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5393/12318 [9:20:04<11:59:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5394/12318 [9:20:11<11:59:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5394/12318 [9:20:11<11:59:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5395/12318 [9:20:16<11:58:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5395/12318 [9:20:16<11:58:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5396/12318 [9:20:23<11:58:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5396/12318 [9:20:23<11:58:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5397/12318 [9:20:30<11:58:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5397/12318 [9:20:30<11:58:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5398/12318 [9:20:34<11:58:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5398/12318 [9:20:34<11:58:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5399/12318 [9:20:38<11:58:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5399/12318 [9:20:38<11:58:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5400/12318 [9:20:41<11:58:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5400/12318 [9:20:41<11:58:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5401/12318 [9:20:44<11:58:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5401/12318 [9:20:44<11:58:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5402/12318 [9:20:53<11:58:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5402/12318 [9:20:53<11:58:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5403/12318 [9:21:01<11:58:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5403/12318 [9:21:01<11:58:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5404/12318 [9:21:03<11:57:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5404/12318 [9:21:03<11:57:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5405/12318 [9:21:08<11:57:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5405/12318 [9:21:08<11:57:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5406/12318 [9:21:13<11:57:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5406/12318 [9:21:13<11:57:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5407/12318 [9:21:14<11:57:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5407/12318 [9:21:14<11:57:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5408/12318 [9:22:00<11:58:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5408/12318 [9:22:00<11:58:06,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5409/12318 [9:22:03<11:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5409/12318 [9:22:03<11:57:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5410/12318 [9:22:08<11:57:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5410/12318 [9:22:08<11:57:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5411/12318 [9:22:13<11:57:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5411/12318 [9:22:13<11:57:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5412/12318 [9:22:20<11:57:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5412/12318 [9:22:20<11:57:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5413/12318 [9:22:28<11:57:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5413/12318 [9:22:28<11:57:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5414/12318 [9:22:30<11:57:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5414/12318 [9:22:30<11:57:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5415/12318 [9:22:35<11:57:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5415/12318 [9:22:35<11:57:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5416/12318 [9:22:37<11:56:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5416/12318 [9:22:37<11:56:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5417/12318 [9:22:43<11:56:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5417/12318 [9:22:43<11:56:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5418/12318 [9:22:44<11:56:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5418/12318 [9:22:44<11:56:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5419/12318 [9:22:52<11:56:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5419/12318 [9:22:52<11:56:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5420/12318 [9:22:56<11:56:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5420/12318 [9:22:56<11:56:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5421/12318 [9:23:04<11:56:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5421/12318 [9:23:04<11:56:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5422/12318 [9:23:09<11:56:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5422/12318 [9:23:09<11:56:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5423/12318 [9:23:10<11:56:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5423/12318 [9:23:10<11:56:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5424/12318 [9:23:19<11:55:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5424/12318 [9:23:19<11:55:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5425/12318 [9:23:20<11:55:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5425/12318 [9:23:20<11:55:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5426/12318 [9:23:28<11:55:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5426/12318 [9:23:28<11:55:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5427/12318 [9:23:31<11:55:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5427/12318 [9:23:31<11:55:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5428/12318 [9:23:33<11:55:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5428/12318 [9:23:33<11:55:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5429/12318 [9:23:35<11:55:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5429/12318 [9:23:35<11:55:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5430/12318 [9:23:41<11:55:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5430/12318 [9:23:41<11:55:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5431/12318 [9:23:44<11:54:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5431/12318 [9:23:44<11:54:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5432/12318 [9:23:50<11:54:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5432/12318 [9:23:50<11:54:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5433/12318 [9:23:57<11:54:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5433/12318 [9:23:57<11:54:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5434/12318 [9:24:02<11:54:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5434/12318 [9:24:02<11:54:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5435/12318 [9:24:10<11:54:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5435/12318 [9:24:10<11:54:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5436/12318 [9:24:15<11:54:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5436/12318 [9:24:15<11:54:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5437/12318 [9:24:18<11:54:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5437/12318 [9:24:18<11:54:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5438/12318 [9:24:21<11:54:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5438/12318 [9:24:21<11:54:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5439/12318 [9:24:29<11:53:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5439/12318 [9:24:29<11:53:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5440/12318 [9:25:16<11:54:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5440/12318 [9:25:16<11:54:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5441/12318 [9:25:21<11:54:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5441/12318 [9:25:21<11:54:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5442/12318 [9:25:29<11:54:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5442/12318 [9:25:29<11:54:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5443/12318 [9:25:33<11:54:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5443/12318 [9:25:33<11:54:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5444/12318 [9:25:40<11:54:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5444/12318 [9:25:40<11:54:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5445/12318 [9:25:47<11:54:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5445/12318 [9:25:47<11:54:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5446/12318 [9:25:50<11:54:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5446/12318 [9:25:50<11:54:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5447/12318 [9:25:52<11:53:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5447/12318 [9:25:52<11:53:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5448/12318 [9:26:00<11:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5448/12318 [9:26:00<11:53:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5449/12318 [9:26:03<11:53:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5449/12318 [9:26:03<11:53:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5450/12318 [9:26:07<11:53:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5450/12318 [9:26:07<11:53:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5451/12318 [9:26:13<11:53:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5451/12318 [9:26:13<11:53:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5452/12318 [9:26:22<11:53:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5452/12318 [9:26:22<11:53:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5453/12318 [9:26:29<11:53:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5453/12318 [9:26:29<11:53:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5454/12318 [9:26:38<11:53:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5454/12318 [9:26:38<11:53:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5455/12318 [9:26:41<11:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5455/12318 [9:26:41<11:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5456/12318 [9:26:46<11:52:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5456/12318 [9:26:46<11:52:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5457/12318 [9:26:51<11:52:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5457/12318 [9:26:51<11:52:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5458/12318 [9:26:55<11:52:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5458/12318 [9:26:55<11:52:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5459/12318 [9:27:00<11:52:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5459/12318 [9:27:00<11:52:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5460/12318 [9:27:04<11:52:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5460/12318 [9:27:04<11:52:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5461/12318 [9:27:07<11:52:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5461/12318 [9:27:07<11:52:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5462/12318 [9:27:16<11:52:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5462/12318 [9:27:16<11:52:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5463/12318 [9:27:21<11:51:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5463/12318 [9:27:21<11:51:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5464/12318 [9:27:27<11:51:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5464/12318 [9:27:27<11:51:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5465/12318 [9:27:34<11:51:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5465/12318 [9:27:34<11:51:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5466/12318 [9:27:37<11:51:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5466/12318 [9:27:37<11:51:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5467/12318 [9:27:45<11:51:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5467/12318 [9:27:45<11:51:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5468/12318 [9:27:48<11:51:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5468/12318 [9:27:48<11:51:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5469/12318 [9:27:52<11:51:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5469/12318 [9:27:52<11:51:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5470/12318 [9:27:53<11:50:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5470/12318 [9:27:53<11:50:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5471/12318 [9:27:56<11:50:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5471/12318 [9:27:56<11:50:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5472/12318 [9:28:41<11:51:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5472/12318 [9:28:41<11:51:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5473/12318 [9:28:45<11:51:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5473/12318 [9:28:45<11:51:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5474/12318 [9:28:52<11:51:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5474/12318 [9:28:52<11:51:14,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5475/12318 [9:28:55<11:51:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5475/12318 [9:28:55<11:51:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5476/12318 [9:28:57<11:50:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5476/12318 [9:28:57<11:50:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5477/12318 [9:28:59<11:50:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5477/12318 [9:28:59<11:50:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5478/12318 [9:29:04<11:50:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5478/12318 [9:29:04<11:50:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5479/12318 [9:29:09<11:50:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5479/12318 [9:29:09<11:50:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5480/12318 [9:29:18<11:50:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5480/12318 [9:29:18<11:50:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  44%|▍| 5481/12318 [9:29:27<11:50:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  44%|▍| 5481/12318 [9:29:27<11:50:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5482/12318 [9:29:32<11:50:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5482/12318 [9:29:32<11:50:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5483/12318 [9:29:37<11:50:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5483/12318 [9:29:37<11:50:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5484/12318 [9:29:41<11:49:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5484/12318 [9:29:41<11:49:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5485/12318 [9:29:43<11:49:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5485/12318 [9:29:43<11:49:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5486/12318 [9:29:49<11:49:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5486/12318 [9:29:49<11:49:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5487/12318 [9:29:53<11:49:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5487/12318 [9:29:53<11:49:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5488/12318 [9:30:01<11:49:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5488/12318 [9:30:01<11:49:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5489/12318 [9:30:09<11:49:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5489/12318 [9:30:09<11:49:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5490/12318 [9:30:17<11:49:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5490/12318 [9:30:17<11:49:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5491/12318 [9:30:21<11:49:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5491/12318 [9:30:21<11:49:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5492/12318 [9:30:24<11:48:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5492/12318 [9:30:24<11:48:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5493/12318 [9:30:28<11:48:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5493/12318 [9:30:28<11:48:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5494/12318 [9:30:32<11:48:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5494/12318 [9:30:32<11:48:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5495/12318 [9:30:33<11:48:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5495/12318 [9:30:33<11:48:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5496/12318 [9:30:41<11:48:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5496/12318 [9:30:41<11:48:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5497/12318 [9:30:43<11:48:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5497/12318 [9:30:43<11:48:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5498/12318 [9:30:48<11:48:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5498/12318 [9:30:48<11:48:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5499/12318 [9:30:51<11:47:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5499/12318 [9:30:51<11:47:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5500/12318 [9:30:59<11:47:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5500/12318 [9:30:59<11:47:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5501/12318 [9:31:07<11:47:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5501/12318 [9:31:07<11:47:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5502/12318 [9:31:09<11:47:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5502/12318 [9:31:09<11:47:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5503/12318 [9:31:12<11:47:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5503/12318 [9:31:12<11:47:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5504/12318 [9:31:49<11:47:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5504/12318 [9:31:49<11:47:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5505/12318 [9:31:57<11:47:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5505/12318 [9:31:57<11:47:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5506/12318 [9:32:03<11:47:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5506/12318 [9:32:03<11:47:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5507/12318 [9:32:09<11:47:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5507/12318 [9:32:09<11:47:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5508/12318 [9:32:16<11:47:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5508/12318 [9:32:16<11:47:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5509/12318 [9:32:21<11:47:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5509/12318 [9:32:21<11:47:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5510/12318 [9:32:30<11:47:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5510/12318 [9:32:30<11:47:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5511/12318 [9:32:37<11:47:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5511/12318 [9:32:37<11:47:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5512/12318 [9:32:42<11:47:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5512/12318 [9:32:42<11:47:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5513/12318 [9:32:50<11:47:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5513/12318 [9:32:50<11:47:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5514/12318 [9:32:58<11:47:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5514/12318 [9:32:58<11:47:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5515/12318 [9:33:02<11:46:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5515/12318 [9:33:02<11:46:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5516/12318 [9:33:04<11:46:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5516/12318 [9:33:04<11:46:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5517/12318 [9:33:13<11:46:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5517/12318 [9:33:13<11:46:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5518/12318 [9:33:22<11:46:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5518/12318 [9:33:22<11:46:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5519/12318 [9:33:28<11:46:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5519/12318 [9:33:28<11:46:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5520/12318 [9:33:34<11:46:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5520/12318 [9:33:34<11:46:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5521/12318 [9:33:39<11:46:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5521/12318 [9:33:39<11:46:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5522/12318 [9:33:44<11:46:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5522/12318 [9:33:44<11:46:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5523/12318 [9:33:45<11:45:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5523/12318 [9:33:45<11:45:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5524/12318 [9:33:46<11:45:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5524/12318 [9:33:46<11:45:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5525/12318 [9:33:53<11:45:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5525/12318 [9:33:53<11:45:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5526/12318 [9:34:02<11:45:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5526/12318 [9:34:02<11:45:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5527/12318 [9:34:04<11:45:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5527/12318 [9:34:04<11:45:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5528/12318 [9:34:12<11:45:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5528/12318 [9:34:12<11:45:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5529/12318 [9:34:13<11:45:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5529/12318 [9:34:13<11:45:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5530/12318 [9:34:21<11:45:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5530/12318 [9:34:21<11:45:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5531/12318 [9:34:23<11:44:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5531/12318 [9:34:23<11:44:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5532/12318 [9:34:30<11:44:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5532/12318 [9:34:30<11:44:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5533/12318 [9:34:33<11:44:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5533/12318 [9:34:33<11:44:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5534/12318 [9:34:39<11:44:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5534/12318 [9:34:39<11:44:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5535/12318 [9:34:42<11:44:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5535/12318 [9:34:42<11:44:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5536/12318 [9:35:13<11:44:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5536/12318 [9:35:13<11:44:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5537/12318 [9:35:15<11:44:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5537/12318 [9:35:15<11:44:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5538/12318 [9:35:23<11:44:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5538/12318 [9:35:23<11:44:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|��| 5539/12318 [9:35:31<11:44:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5539/12318 [9:35:31<11:44:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5540/12318 [9:35:34<11:44:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5540/12318 [9:35:34<11:44:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5541/12318 [9:35:43<11:44:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5541/12318 [9:35:43<11:44:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5542/12318 [9:35:46<11:43:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5542/12318 [9:35:46<11:43:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5543/12318 [9:35:51<11:43:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5543/12318 [9:35:51<11:43:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5544/12318 [9:35:53<11:43:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5544/12318 [9:35:53<11:43:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5545/12318 [9:35:58<11:43:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5545/12318 [9:35:58<11:43:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5546/12318 [9:36:03<11:43:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5546/12318 [9:36:03<11:43:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5547/12318 [9:36:12<11:43:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5547/12318 [9:36:12<11:43:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5548/12318 [9:36:13<11:43:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5548/12318 [9:36:13<11:43:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5549/12318 [9:36:20<11:43:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5549/12318 [9:36:20<11:43:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5550/12318 [9:36:28<11:42:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5550/12318 [9:36:28<11:42:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5551/12318 [9:36:35<11:42:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5551/12318 [9:36:35<11:42:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5552/12318 [9:36:38<11:42:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5552/12318 [9:36:38<11:42:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5553/12318 [9:36:43<11:42:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5553/12318 [9:36:43<11:42:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5554/12318 [9:36:50<11:42:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5554/12318 [9:36:50<11:42:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5555/12318 [9:36:54<11:42:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5555/12318 [9:36:54<11:42:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5556/12318 [9:37:03<11:42:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5556/12318 [9:37:03<11:42:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5557/12318 [9:37:12<11:42:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5557/12318 [9:37:12<11:42:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5558/12318 [9:37:19<11:42:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5558/12318 [9:37:19<11:42:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5559/12318 [9:37:23<11:42:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5559/12318 [9:37:23<11:42:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5560/12318 [9:37:27<11:41:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5560/12318 [9:37:27<11:41:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5561/12318 [9:37:32<11:41:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5561/12318 [9:37:32<11:41:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5562/12318 [9:37:37<11:41:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5562/12318 [9:37:37<11:41:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5563/12318 [9:37:39<11:41:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5563/12318 [9:37:39<11:41:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5564/12318 [9:37:42<11:41:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5564/12318 [9:37:42<11:41:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5565/12318 [9:37:46<11:41:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5565/12318 [9:37:46<11:41:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5566/12318 [9:37:50<11:40:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5566/12318 [9:37:50<11:40:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5567/12318 [9:37:57<11:40:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5567/12318 [9:37:57<11:40:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5568/12318 [9:38:22<11:41:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5568/12318 [9:38:22<11:41:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5569/12318 [9:38:28<11:41:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5569/12318 [9:38:28<11:41:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5570/12318 [9:38:33<11:40:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5570/12318 [9:38:33<11:40:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5571/12318 [9:38:39<11:40:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5571/12318 [9:38:39<11:40:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5572/12318 [9:38:47<11:40:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5572/12318 [9:38:47<11:40:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5573/12318 [9:38:52<11:40:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5573/12318 [9:38:52<11:40:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5574/12318 [9:38:57<11:40:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5574/12318 [9:38:57<11:40:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5575/12318 [9:38:59<11:40:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5575/12318 [9:38:59<11:40:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5576/12318 [9:39:04<11:40:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5576/12318 [9:39:04<11:40:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5577/12318 [9:39:05<11:39:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5577/12318 [9:39:05<11:39:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5578/12318 [9:39:08<11:39:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5578/12318 [9:39:08<11:39:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5579/12318 [9:39:15<11:39:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5579/12318 [9:39:15<11:39:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5580/12318 [9:39:22<11:39:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5580/12318 [9:39:22<11:39:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5581/12318 [9:39:27<11:39:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5581/12318 [9:39:27<11:39:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5582/12318 [9:39:34<11:39:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5582/12318 [9:39:34<11:39:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5583/12318 [9:39:42<11:39:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5583/12318 [9:39:42<11:39:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5584/12318 [9:39:46<11:39:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5584/12318 [9:39:46<11:39:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5585/12318 [9:39:50<11:39:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5585/12318 [9:39:50<11:39:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5586/12318 [9:39:56<11:38:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5586/12318 [9:39:56<11:38:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5587/12318 [9:40:01<11:38:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5587/12318 [9:40:01<11:38:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5588/12318 [9:40:09<11:38:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5588/12318 [9:40:09<11:38:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5589/12318 [9:40:17<11:38:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5589/12318 [9:40:17<11:38:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5590/12318 [9:40:20<11:38:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5590/12318 [9:40:20<11:38:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5591/12318 [9:40:25<11:38:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5591/12318 [9:40:25<11:38:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5592/12318 [9:40:32<11:38:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5592/12318 [9:40:32<11:38:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5593/12318 [9:40:39<11:38:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5593/12318 [9:40:39<11:38:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5594/12318 [9:40:47<11:38:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5594/12318 [9:40:47<11:38:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5595/12318 [9:40:55<11:38:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5595/12318 [9:40:55<11:38:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5596/12318 [9:40:59<11:37:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5596/12318 [9:40:59<11:37:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5597/12318 [9:41:00<11:37:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5597/12318 [9:41:00<11:37:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5598/12318 [9:41:05<11:37:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5598/12318 [9:41:05<11:37:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5599/12318 [9:41:13<11:37:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5599/12318 [9:41:13<11:37:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5600/12318 [9:41:48<11:37:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5600/12318 [9:41:48<11:37:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5601/12318 [9:42:13<11:38:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5601/12318 [9:42:13<11:38:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5602/12318 [9:42:21<11:38:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5602/12318 [9:42:21<11:38:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5603/12318 [9:42:23<11:37:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5603/12318 [9:42:23<11:37:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  45%|▍| 5604/12318 [9:42:30<11:37:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  45%|▍| 5604/12318 [9:42:30<11:37:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5605/12318 [9:42:39<11:37:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5605/12318 [9:42:39<11:37:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5606/12318 [9:42:43<11:37:40,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5606/12318 [9:42:43<11:37:40,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5607/12318 [9:42:48<11:37:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5607/12318 [9:42:48<11:37:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5608/12318 [9:42:56<11:37:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5608/12318 [9:42:56<11:37:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5609/12318 [9:42:57<11:37:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5609/12318 [9:42:57<11:37:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5610/12318 [9:43:01<11:37:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5610/12318 [9:43:01<11:37:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5611/12318 [9:43:08<11:37:02,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5611/12318 [9:43:08<11:37:02,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5612/12318 [9:43:12<11:36:54,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5612/12318 [9:43:12<11:36:54,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5613/12318 [9:43:17<11:36:46,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5613/12318 [9:43:17<11:36:46,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5614/12318 [9:43:22<11:36:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5614/12318 [9:43:22<11:36:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5615/12318 [9:43:29<11:36:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5615/12318 [9:43:29<11:36:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5616/12318 [9:43:31<11:36:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5616/12318 [9:43:31<11:36:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5617/12318 [9:43:38<11:36:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5617/12318 [9:43:38<11:36:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5618/12318 [9:43:41<11:36:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5618/12318 [9:43:41<11:36:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5619/12318 [9:43:43<11:35:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5619/12318 [9:43:43<11:35:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5620/12318 [9:43:47<11:35:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5620/12318 [9:43:47<11:35:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5621/12318 [9:43:49<11:35:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5621/12318 [9:43:49<11:35:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5622/12318 [9:43:50<11:35:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5622/12318 [9:43:50<11:35:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5623/12318 [9:43:57<11:35:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5623/12318 [9:43:57<11:35:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5624/12318 [9:44:04<11:35:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5624/12318 [9:44:04<11:35:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5625/12318 [9:44:07<11:35:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5625/12318 [9:44:07<11:35:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5626/12318 [9:44:11<11:34:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5626/12318 [9:44:11<11:34:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5627/12318 [9:44:15<11:34:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5627/12318 [9:44:15<11:34:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5628/12318 [9:44:20<11:34:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5628/12318 [9:44:20<11:34:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5629/12318 [9:44:22<11:34:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5629/12318 [9:44:22<11:34:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5630/12318 [9:44:26<11:34:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5630/12318 [9:44:26<11:34:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5631/12318 [9:44:35<11:34:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5631/12318 [9:44:35<11:34:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5632/12318 [9:45:15<11:34:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5632/12318 [9:45:15<11:34:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5633/12318 [9:45:19<11:34:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5633/12318 [9:45:19<11:34:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5634/12318 [9:45:22<11:34:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5634/12318 [9:45:22<11:34:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5635/12318 [9:45:26<11:34:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5635/12318 [9:45:26<11:34:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5636/12318 [9:45:31<11:34:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5636/12318 [9:45:31<11:34:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5637/12318 [9:45:38<11:34:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5637/12318 [9:45:38<11:34:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5638/12318 [9:45:46<11:34:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5638/12318 [9:45:46<11:34:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5639/12318 [9:45:52<11:33:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5639/12318 [9:45:52<11:33:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5640/12318 [9:45:55<11:33:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5640/12318 [9:45:55<11:33:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5641/12318 [9:46:01<11:33:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5641/12318 [9:46:01<11:33:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5642/12318 [9:46:08<11:33:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5642/12318 [9:46:08<11:33:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5643/12318 [9:46:13<11:33:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5643/12318 [9:46:13<11:33:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5644/12318 [9:46:15<11:33:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5644/12318 [9:46:15<11:33:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5645/12318 [9:46:20<11:33:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5645/12318 [9:46:20<11:33:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5646/12318 [9:46:23<11:32:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5646/12318 [9:46:23<11:32:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5647/12318 [9:46:28<11:32:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5647/12318 [9:46:28<11:32:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5648/12318 [9:46:34<11:32:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5648/12318 [9:46:34<11:32:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5649/12318 [9:46:40<11:32:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5649/12318 [9:46:40<11:32:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5650/12318 [9:46:44<11:32:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5650/12318 [9:46:44<11:32:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5651/12318 [9:46:51<11:32:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5651/12318 [9:46:51<11:32:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5652/12318 [9:46:56<11:32:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5652/12318 [9:46:56<11:32:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5653/12318 [9:47:03<11:32:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5653/12318 [9:47:03<11:32:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5654/12318 [9:47:06<11:31:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5654/12318 [9:47:06<11:31:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5655/12318 [9:47:14<11:31:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5655/12318 [9:47:14<11:31:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5656/12318 [9:47:17<11:31:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5656/12318 [9:47:17<11:31:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5657/12318 [9:47:21<11:31:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5657/12318 [9:47:21<11:31:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5658/12318 [9:47:25<11:31:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5658/12318 [9:47:25<11:31:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5659/12318 [9:47:32<11:31:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5659/12318 [9:47:32<11:31:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5660/12318 [9:47:34<11:31:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5660/12318 [9:47:34<11:31:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5661/12318 [9:47:40<11:31:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5661/12318 [9:47:40<11:31:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5662/12318 [9:47:46<11:30:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5662/12318 [9:47:46<11:30:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5663/12318 [9:47:47<11:30:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5663/12318 [9:47:47<11:30:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5664/12318 [9:48:41<11:31:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5664/12318 [9:48:41<11:31:35,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5665/12318 [9:48:46<11:31:28,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5665/12318 [9:48:46<11:31:28,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5666/12318 [9:48:52<11:31:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5666/12318 [9:48:52<11:31:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5667/12318 [9:48:56<11:31:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5667/12318 [9:48:56<11:31:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5668/12318 [9:49:01<11:31:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5668/12318 [9:49:01<11:31:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5669/12318 [9:49:06<11:30:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5669/12318 [9:49:06<11:30:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5670/12318 [9:49:14<11:30:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5670/12318 [9:49:14<11:30:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5671/12318 [9:49:18<11:30:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5671/12318 [9:49:18<11:30:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5672/12318 [9:49:21<11:30:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5672/12318 [9:49:21<11:30:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5673/12318 [9:49:27<11:30:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5673/12318 [9:49:27<11:30:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5674/12318 [9:49:31<11:30:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5674/12318 [9:49:31<11:30:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5675/12318 [9:49:37<11:30:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5675/12318 [9:49:37<11:30:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5676/12318 [9:49:41<11:30:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5676/12318 [9:49:41<11:30:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5677/12318 [9:49:43<11:29:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5677/12318 [9:49:43<11:29:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5678/12318 [9:49:51<11:29:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5678/12318 [9:49:51<11:29:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5679/12318 [9:49:57<11:29:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5679/12318 [9:49:57<11:29:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5680/12318 [9:50:06<11:29:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5680/12318 [9:50:06<11:29:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5681/12318 [9:50:09<11:29:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5681/12318 [9:50:09<11:29:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5682/12318 [9:50:17<11:29:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5682/12318 [9:50:17<11:29:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5683/12318 [9:50:26<11:29:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5683/12318 [9:50:26<11:29:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5684/12318 [9:50:32<11:29:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5684/12318 [9:50:32<11:29:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5685/12318 [9:50:40<11:29:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5685/12318 [9:50:40<11:29:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5686/12318 [9:50:43<11:29:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5686/12318 [9:50:43<11:29:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5687/12318 [9:50:50<11:28:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5687/12318 [9:50:50<11:28:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5688/12318 [9:50:57<11:28:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5688/12318 [9:50:57<11:28:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5689/12318 [9:50:58<11:28:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5689/12318 [9:50:58<11:28:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5690/12318 [9:51:03<11:28:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5690/12318 [9:51:03<11:28:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5691/12318 [9:51:11<11:28:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5691/12318 [9:51:11<11:28:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5692/12318 [9:51:15<11:28:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5692/12318 [9:51:15<11:28:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5693/12318 [9:51:21<11:28:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5693/12318 [9:51:21<11:28:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5694/12318 [9:51:24<11:28:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|���| 5694/12318 [9:51:24<11:28:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5695/12318 [9:51:26<11:27:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5695/12318 [9:51:26<11:27:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5696/12318 [9:52:01<11:28:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5696/12318 [9:52:01<11:28:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5697/12318 [9:52:08<11:28:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5697/12318 [9:52:08<11:28:10,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5698/12318 [9:52:13<11:28:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5698/12318 [9:52:13<11:28:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5699/12318 [9:52:21<11:27:58,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5699/12318 [9:52:21<11:27:58,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5700/12318 [9:52:24<11:27:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5700/12318 [9:52:24<11:27:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5701/12318 [9:52:29<11:27:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5701/12318 [9:52:29<11:27:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5702/12318 [9:52:34<11:27:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5702/12318 [9:52:34<11:27:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5703/12318 [9:52:43<11:27:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5703/12318 [9:52:43<11:27:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5704/12318 [9:52:46<11:27:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5704/12318 [9:52:46<11:27:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5705/12318 [9:52:51<11:27:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5705/12318 [9:52:51<11:27:13,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5706/12318 [9:52:53<11:27:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5706/12318 [9:52:53<11:27:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5707/12318 [9:52:56<11:26:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5707/12318 [9:52:56<11:26:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5708/12318 [9:53:04<11:26:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5708/12318 [9:53:04<11:26:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5709/12318 [9:53:12<11:26:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5709/12318 [9:53:12<11:26:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5710/12318 [9:53:18<11:26:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5710/12318 [9:53:18<11:26:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5711/12318 [9:53:23<11:26:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5711/12318 [9:53:23<11:26:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5712/12318 [9:53:28<11:26:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5712/12318 [9:53:28<11:26:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5713/12318 [9:53:35<11:26:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5713/12318 [9:53:35<11:26:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5714/12318 [9:53:43<11:26:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5714/12318 [9:53:43<11:26:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5715/12318 [9:53:49<11:26:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5715/12318 [9:53:49<11:26:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5716/12318 [9:53:54<11:25:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5716/12318 [9:53:54<11:25:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5717/12318 [9:53:59<11:25:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5717/12318 [9:53:59<11:25:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5718/12318 [9:54:02<11:25:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5718/12318 [9:54:02<11:25:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5719/12318 [9:54:10<11:25:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5719/12318 [9:54:10<11:25:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5720/12318 [9:54:13<11:25:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5720/12318 [9:54:13<11:25:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5721/12318 [9:54:16<11:25:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5721/12318 [9:54:16<11:25:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5722/12318 [9:54:22<11:25:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5722/12318 [9:54:22<11:25:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5723/12318 [9:54:26<11:25:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5723/12318 [9:54:26<11:25:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5724/12318 [9:54:31<11:24:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5724/12318 [9:54:31<11:24:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5725/12318 [9:54:37<11:24:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5725/12318 [9:54:37<11:24:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5726/12318 [9:54:40<11:24:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5726/12318 [9:54:40<11:24:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  46%|▍| 5727/12318 [9:54:46<11:24:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  46%|▍| 5727/12318 [9:54:46<11:24:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5728/12318 [9:55:15<11:24:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5728/12318 [9:55:15<11:24:50,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5729/12318 [9:55:22<11:24:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5729/12318 [9:55:22<11:24:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5730/12318 [9:55:24<11:24:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5730/12318 [9:55:24<11:24:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5731/12318 [9:55:31<11:24:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5731/12318 [9:55:31<11:24:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5732/12318 [9:55:35<11:24:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5732/12318 [9:55:35<11:24:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5733/12318 [9:55:40<11:24:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5733/12318 [9:55:40<11:24:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5734/12318 [9:55:45<11:24:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5734/12318 [9:55:45<11:24:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5735/12318 [9:55:47<11:23:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5735/12318 [9:55:47<11:23:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5736/12318 [9:55:54<11:23:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5736/12318 [9:55:54<11:23:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5737/12318 [9:55:55<11:23:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5737/12318 [9:55:55<11:23:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5738/12318 [9:56:03<11:23:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5738/12318 [9:56:03<11:23:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5739/12318 [9:56:10<11:23:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5739/12318 [9:56:10<11:23:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5740/12318 [9:56:16<11:23:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5740/12318 [9:56:16<11:23:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5741/12318 [9:56:18<11:23:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5741/12318 [9:56:18<11:23:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5742/12318 [9:56:23<11:23:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5742/12318 [9:56:23<11:23:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5743/12318 [9:56:30<11:22:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5743/12318 [9:56:30<11:22:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5744/12318 [9:56:39<11:22:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5744/12318 [9:56:39<11:22:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5745/12318 [9:56:41<11:22:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5745/12318 [9:56:41<11:22:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5746/12318 [9:56:48<11:22:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5746/12318 [9:56:48<11:22:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5747/12318 [9:56:51<11:22:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5747/12318 [9:56:51<11:22:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5748/12318 [9:56:57<11:22:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5748/12318 [9:56:57<11:22:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5749/12318 [9:57:01<11:22:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5749/12318 [9:57:01<11:22:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5750/12318 [9:57:09<11:22:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5750/12318 [9:57:09<11:22:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5751/12318 [9:57:12<11:21:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5751/12318 [9:57:12<11:21:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5752/12318 [9:57:14<11:21:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5752/12318 [9:57:14<11:21:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5753/12318 [9:57:16<11:21:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5753/12318 [9:57:16<11:21:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5754/12318 [9:57:18<11:21:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5754/12318 [9:57:18<11:21:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5755/12318 [9:57:23<11:21:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5755/12318 [9:57:23<11:21:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5756/12318 [9:57:32<11:21:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5756/12318 [9:57:32<11:21:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5757/12318 [9:57:39<11:21:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5757/12318 [9:57:39<11:21:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5758/12318 [9:57:42<11:20:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5758/12318 [9:57:42<11:20:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5759/12318 [9:57:47<11:20:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5759/12318 [9:57:47<11:20:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5760/12318 [9:58:31<11:21:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5760/12318 [9:58:31<11:21:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5761/12318 [9:58:35<11:21:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5761/12318 [9:58:35<11:21:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5762/12318 [9:58:43<11:21:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5762/12318 [9:58:43<11:21:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5763/12318 [9:58:48<11:21:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5763/12318 [9:58:48<11:21:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5764/12318 [9:58:52<11:20:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5764/12318 [9:58:52<11:20:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5765/12318 [9:58:57<11:20:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5765/12318 [9:58:57<11:20:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5766/12318 [9:59:03<11:20:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5766/12318 [9:59:03<11:20:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5767/12318 [9:59:05<11:20:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5767/12318 [9:59:05<11:20:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5768/12318 [9:59:11<11:20:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5768/12318 [9:59:11<11:20:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5769/12318 [9:59:15<11:20:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5769/12318 [9:59:15<11:20:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5770/12318 [9:59:21<11:20:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5770/12318 [9:59:21<11:20:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5771/12318 [9:59:29<11:20:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5771/12318 [9:59:29<11:20:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5772/12318 [9:59:34<11:19:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5772/12318 [9:59:34<11:19:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5773/12318 [9:59:35<11:19:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5773/12318 [9:59:35<11:19:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5774/12318 [9:59:41<11:19:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5774/12318 [9:59:41<11:19:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5775/12318 [9:59:47<11:19:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5775/12318 [9:59:47<11:19:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5776/12318 [9:59:53<11:19:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5776/12318 [9:59:53<11:19:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5777/12318 [9:59:55<11:19:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  47%|▍| 5777/12318 [9:59:55<11:19:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5778/12318 [10:00:00<11:19:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5778/12318 [10:00:00<11:19:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5779/12318 [10:00:05<11:19:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5779/12318 [10:00:05<11:19:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5780/12318 [10:00:10<11:18:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5780/12318 [10:00:10<11:18:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5781/12318 [10:00:16<11:18:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5781/12318 [10:00:16<11:18:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5782/12318 [10:00:23<11:18:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5782/12318 [10:00:23<11:18:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5783/12318 [10:00:32<11:18:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5783/12318 [10:00:32<11:18:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5784/12318 [10:00:37<11:18:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5784/12318 [10:00:37<11:18:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5785/12318 [10:00:44<11:18:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5785/12318 [10:00:44<11:18:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5786/12318 [10:00:52<11:18:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5786/12318 [10:00:52<11:18:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5787/12318 [10:00:54<11:18:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5787/12318 [10:00:54<11:18:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5788/12318 [10:01:03<11:18:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5788/12318 [10:01:03<11:18:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5789/12318 [10:01:11<11:18:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5789/12318 [10:01:11<11:18:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5790/12318 [10:01:16<11:17:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5790/12318 [10:01:16<11:17:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5791/12318 [10:01:23<11:17:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5791/12318 [10:01:23<11:17:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5792/12318 [10:01:59<11:18:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5792/12318 [10:01:59<11:18:16,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5793/12318 [10:02:04<11:18:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5793/12318 [10:02:04<11:18:09,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5794/12318 [10:02:06<11:17:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5794/12318 [10:02:06<11:17:58,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5795/12318 [10:02:11<11:17:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5795/12318 [10:02:11<11:17:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5796/12318 [10:02:16<11:17:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5796/12318 [10:02:16<11:17:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5797/12318 [10:02:25<11:17:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5797/12318 [10:02:25<11:17:39,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5798/12318 [10:02:32<11:17:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5798/12318 [10:02:32<11:17:33,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5799/12318 [10:02:40<11:17:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5799/12318 [10:02:40<11:17:30,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5800/12318 [10:02:41<11:17:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5800/12318 [10:02:41<11:17:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5801/12318 [10:02:48<11:17:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5801/12318 [10:02:48<11:17:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5802/12318 [10:02:53<11:17:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5802/12318 [10:02:53<11:17:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5803/12318 [10:03:01<11:17:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5803/12318 [10:03:01<11:17:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5804/12318 [10:03:03<11:16:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5804/12318 [10:03:03<11:16:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5805/12318 [10:03:06<11:16:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5805/12318 [10:03:06<11:16:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5806/12318 [10:03:13<11:16:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5806/12318 [10:03:13<11:16:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5807/12318 [10:03:19<11:16:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5807/12318 [10:03:19<11:16:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5808/12318 [10:03:25<11:16:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5808/12318 [10:03:25<11:16:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5809/12318 [10:03:34<11:16:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5809/12318 [10:03:34<11:16:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5810/12318 [10:03:41<11:16:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5810/12318 [10:03:41<11:16:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5811/12318 [10:03:48<11:16:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5811/12318 [10:03:48<11:16:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5812/12318 [10:03:55<11:16:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5812/12318 [10:03:55<11:16:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5813/12318 [10:04:00<11:15:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5813/12318 [10:04:00<11:15:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5814/12318 [10:04:07<11:15:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5814/12318 [10:04:07<11:15:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5815/12318 [10:04:15<11:15:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5815/12318 [10:04:15<11:15:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5816/12318 [10:04:19<11:15:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5816/12318 [10:04:19<11:15:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5817/12318 [10:04:28<11:15:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5817/12318 [10:04:28<11:15:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5818/12318 [10:04:31<11:15:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5818/12318 [10:04:31<11:15:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5819/12318 [10:04:34<11:15:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5819/12318 [10:04:34<11:15:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5820/12318 [10:04:36<11:15:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5820/12318 [10:04:36<11:15:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5821/12318 [10:04:44<11:14:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5821/12318 [10:04:44<11:14:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5822/12318 [10:04:50<11:14:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5822/12318 [10:04:50<11:14:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5823/12318 [10:04:58<11:14:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5823/12318 [10:04:58<11:14:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5824/12318 [10:05:17<11:14:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5824/12318 [10:05:17<11:14:55,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5825/12318 [10:05:24<11:14:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5825/12318 [10:05:24<11:14:50,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5826/12318 [10:05:30<11:14:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5826/12318 [10:05:30<11:14:43,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5827/12318 [10:05:34<11:14:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5827/12318 [10:05:34<11:14:34,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5828/12318 [10:05:42<11:14:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5828/12318 [10:05:42<11:14:31,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5829/12318 [10:05:51<11:14:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5829/12318 [10:05:51<11:14:27,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5830/12318 [10:06:00<11:14:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5830/12318 [10:06:00<11:14:24,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5831/12318 [10:06:06<11:14:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5831/12318 [10:06:06<11:14:18,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5832/12318 [10:06:09<11:14:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5832/12318 [10:06:09<11:14:08,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5833/12318 [10:06:18<11:14:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5833/12318 [10:06:18<11:14:04,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5834/12318 [10:06:23<11:13:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5834/12318 [10:06:23<11:13:57,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5835/12318 [10:06:32<11:13:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5835/12318 [10:06:32<11:13:53,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5836/12318 [10:06:35<11:13:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5836/12318 [10:06:35<11:13:44,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5837/12318 [10:06:41<11:13:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5837/12318 [10:06:41<11:13:37,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5838/12318 [10:06:46<11:13:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5838/12318 [10:06:46<11:13:30,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5839/12318 [10:06:54<11:13:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5839/12318 [10:06:54<11:13:25,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5840/12318 [10:06:58<11:13:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5840/12318 [10:06:58<11:13:17,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5841/12318 [10:07:03<11:13:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5841/12318 [10:07:03<11:13:09,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5842/12318 [10:07:07<11:13:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5842/12318 [10:07:07<11:13:00,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5843/12318 [10:07:10<11:12:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5843/12318 [10:07:10<11:12:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5844/12318 [10:07:12<11:12:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5844/12318 [10:07:12<11:12:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5845/12318 [10:07:16<11:12:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5845/12318 [10:07:16<11:12:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5846/12318 [10:07:21<11:12:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5846/12318 [10:07:21<11:12:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5847/12318 [10:07:23<11:12:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5847/12318 [10:07:23<11:12:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5848/12318 [10:07:24<11:12:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5848/12318 [10:07:24<11:12:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5849/12318 [10:07:31<11:11:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5849/12318 [10:07:31<11:11:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5850/12318 [10:07:34<11:11:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5850/12318 [10:07:34<11:11:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  47%|▍| 5851/12318 [10:07:40<11:11:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  47%|▍| 5851/12318 [10:07:40<11:11:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5852/12318 [10:07:46<11:11:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5852/12318 [10:07:46<11:11:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5853/12318 [10:07:47<11:11:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5853/12318 [10:07:47<11:11:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5854/12318 [10:07:51<11:11:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5854/12318 [10:07:51<11:11:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5855/12318 [10:07:52<11:11:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5855/12318 [10:07:52<11:11:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5856/12318 [10:08:34<11:11:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5856/12318 [10:08:34<11:11:33,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5857/12318 [10:08:36<11:11:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5857/12318 [10:08:36<11:11:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5858/12318 [10:08:42<11:11:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5858/12318 [10:08:42<11:11:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5859/12318 [10:08:47<11:11:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5859/12318 [10:08:47<11:11:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5860/12318 [10:08:56<11:11:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5860/12318 [10:08:56<11:11:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5861/12318 [10:09:04<11:11:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5861/12318 [10:09:04<11:11:00,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5862/12318 [10:09:10<11:10:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5862/12318 [10:09:10<11:10:54,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5863/12318 [10:09:13<11:10:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5863/12318 [10:09:13<11:10:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5864/12318 [10:09:18<11:10:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5864/12318 [10:09:18<11:10:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5865/12318 [10:09:22<11:10:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5865/12318 [10:09:22<11:10:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5866/12318 [10:09:31<11:10:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5866/12318 [10:09:31<11:10:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5867/12318 [10:09:40<11:10:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5867/12318 [10:09:40<11:10:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5868/12318 [10:09:48<11:10:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5868/12318 [10:09:48<11:10:16,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5869/12318 [10:09:53<11:10:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5869/12318 [10:09:53<11:10:09,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5870/12318 [10:09:56<11:10:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5870/12318 [10:09:56<11:10:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5871/12318 [10:10:02<11:09:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5871/12318 [10:10:02<11:09:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5872/12318 [10:10:08<11:09:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5872/12318 [10:10:08<11:09:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5873/12318 [10:10:15<11:09:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5873/12318 [10:10:15<11:09:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5874/12318 [10:10:22<11:09:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5874/12318 [10:10:22<11:09:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5875/12318 [10:10:27<11:09:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5875/12318 [10:10:27<11:09:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5876/12318 [10:10:28<11:09:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5876/12318 [10:10:28<11:09:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5877/12318 [10:10:37<11:09:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5877/12318 [10:10:37<11:09:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5878/12318 [10:10:41<11:09:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5878/12318 [10:10:41<11:09:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5879/12318 [10:10:46<11:08:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5879/12318 [10:10:46<11:08:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5880/12318 [10:10:50<11:08:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5880/12318 [10:10:50<11:08:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5881/12318 [10:10:59<11:08:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5881/12318 [10:10:59<11:08:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5882/12318 [10:11:02<11:08:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5882/12318 [10:11:02<11:08:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5883/12318 [10:11:11<11:08:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5883/12318 [10:11:11<11:08:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5884/12318 [10:11:14<11:08:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5884/12318 [10:11:14<11:08:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5885/12318 [10:11:15<11:08:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5885/12318 [10:11:15<11:08:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5886/12318 [10:11:16<11:07:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5886/12318 [10:11:16<11:07:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5887/12318 [10:11:21<11:07:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5887/12318 [10:11:21<11:07:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5888/12318 [10:11:46<11:08:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5888/12318 [10:11:46<11:08:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5889/12318 [10:11:55<11:08:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5889/12318 [10:11:55<11:08:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5890/12318 [10:12:01<11:07:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5890/12318 [10:12:01<11:07:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5891/12318 [10:12:06<11:07:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5891/12318 [10:12:06<11:07:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5892/12318 [10:12:08<11:07:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5892/12318 [10:12:08<11:07:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5893/12318 [10:12:13<11:07:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5893/12318 [10:12:13<11:07:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5894/12318 [10:12:17<11:07:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5894/12318 [10:12:17<11:07:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5895/12318 [10:12:22<11:07:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5895/12318 [10:12:22<11:07:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5896/12318 [10:12:25<11:07:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5896/12318 [10:12:25<11:07:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5897/12318 [10:12:32<11:06:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5897/12318 [10:12:32<11:06:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5898/12318 [10:12:40<11:06:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5898/12318 [10:12:40<11:06:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5899/12318 [10:12:41<11:06:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5899/12318 [10:12:41<11:06:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5900/12318 [10:12:50<11:06:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5900/12318 [10:12:50<11:06:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5901/12318 [10:12:53<11:06:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5901/12318 [10:12:53<11:06:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5902/12318 [10:13:01<11:06:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5902/12318 [10:13:01<11:06:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5903/12318 [10:13:04<11:06:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5903/12318 [10:13:04<11:06:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5904/12318 [10:13:12<11:06:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5904/12318 [10:13:12<11:06:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5905/12318 [10:13:14<11:05:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5905/12318 [10:13:14<11:05:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5906/12318 [10:13:19<11:05:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5906/12318 [10:13:19<11:05:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5907/12318 [10:13:24<11:05:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5907/12318 [10:13:24<11:05:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5908/12318 [10:13:33<11:05:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5908/12318 [10:13:33<11:05:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5909/12318 [10:13:38<11:05:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5909/12318 [10:13:38<11:05:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5910/12318 [10:13:45<11:05:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5910/12318 [10:13:45<11:05:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5911/12318 [10:13:48<11:05:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5911/12318 [10:13:48<11:05:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5912/12318 [10:13:54<11:05:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5912/12318 [10:13:54<11:05:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5913/12318 [10:14:00<11:05:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5913/12318 [10:14:00<11:05:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5914/12318 [10:14:05<11:04:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5914/12318 [10:14:05<11:04:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5915/12318 [10:14:12<11:04:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5915/12318 [10:14:12<11:04:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5916/12318 [10:14:16<11:04:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5916/12318 [10:14:16<11:04:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5917/12318 [10:14:19<11:04:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5917/12318 [10:14:19<11:04:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5918/12318 [10:14:28<11:04:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5918/12318 [10:14:28<11:04:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5919/12318 [10:14:34<11:04:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5919/12318 [10:14:34<11:04:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5920/12318 [10:15:06<11:04:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5920/12318 [10:15:06<11:04:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5921/12318 [10:15:15<11:04:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5921/12318 [10:15:15<11:04:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5922/12318 [10:15:19<11:04:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5922/12318 [10:15:19<11:04:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5923/12318 [10:15:26<11:04:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5923/12318 [10:15:26<11:04:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5924/12318 [10:15:27<11:04:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5924/12318 [10:15:27<11:04:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5925/12318 [10:15:29<11:04:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5925/12318 [10:15:29<11:04:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5926/12318 [10:15:34<11:03:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5926/12318 [10:15:34<11:03:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5927/12318 [10:15:40<11:03:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5927/12318 [10:15:40<11:03:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5928/12318 [10:15:46<11:03:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5928/12318 [10:15:46<11:03:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5929/12318 [10:15:50<11:03:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5929/12318 [10:15:50<11:03:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5930/12318 [10:15:57<11:03:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5930/12318 [10:15:57<11:03:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5931/12318 [10:16:03<11:03:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5931/12318 [10:16:03<11:03:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5932/12318 [10:16:11<11:03:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5932/12318 [10:16:11<11:03:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5933/12318 [10:16:17<11:03:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5933/12318 [10:16:17<11:03:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5934/12318 [10:16:24<11:03:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5934/12318 [10:16:24<11:03:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5935/12318 [10:16:28<11:03:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5935/12318 [10:16:28<11:03:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5936/12318 [10:16:32<11:02:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5936/12318 [10:16:32<11:02:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5937/12318 [10:16:37<11:02:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5937/12318 [10:16:37<11:02:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5938/12318 [10:16:40<11:02:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5938/12318 [10:16:40<11:02:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5939/12318 [10:16:43<11:02:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5939/12318 [10:16:43<11:02:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5940/12318 [10:16:46<11:02:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5940/12318 [10:16:46<11:02:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5941/12318 [10:16:51<11:02:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5941/12318 [10:16:51<11:02:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5942/12318 [10:16:56<11:01:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5942/12318 [10:16:56<11:01:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5943/12318 [10:16:58<11:01:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5943/12318 [10:16:58<11:01:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5944/12318 [10:17:02<11:01:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5944/12318 [10:17:02<11:01:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5945/12318 [10:17:09<11:01:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5945/12318 [10:17:09<11:01:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5946/12318 [10:17:14<11:01:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5946/12318 [10:17:14<11:01:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5947/12318 [10:17:22<11:01:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5947/12318 [10:17:22<11:01:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5948/12318 [10:17:24<11:01:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5948/12318 [10:17:24<11:01:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5949/12318 [10:17:29<11:01:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5949/12318 [10:17:29<11:01:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5950/12318 [10:17:32<11:00:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5950/12318 [10:17:32<11:00:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5951/12318 [10:17:35<11:00:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5951/12318 [10:17:35<11:00:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5952/12318 [10:18:13<11:01:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5952/12318 [10:18:13<11:01:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5953/12318 [10:18:22<11:01:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5953/12318 [10:18:22<11:01:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5954/12318 [10:18:28<11:01:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5954/12318 [10:18:28<11:01:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5955/12318 [10:18:32<11:00:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5955/12318 [10:18:32<11:00:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5956/12318 [10:18:34<11:00:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5956/12318 [10:18:34<11:00:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5957/12318 [10:18:41<11:00:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5957/12318 [10:18:41<11:00:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5958/12318 [10:18:47<11:00:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5958/12318 [10:18:47<11:00:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5959/12318 [10:18:48<11:00:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5959/12318 [10:18:48<11:00:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5960/12318 [10:18:51<11:00:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5960/12318 [10:18:51<11:00:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5961/12318 [10:18:53<11:00:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5961/12318 [10:18:53<11:00:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5962/12318 [10:18:56<10:59:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5962/12318 [10:18:56<10:59:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5963/12318 [10:19:02<10:59:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5963/12318 [10:19:02<10:59:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5964/12318 [10:19:08<10:59:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5964/12318 [10:19:08<10:59:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5965/12318 [10:19:13<10:59:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5965/12318 [10:19:13<10:59:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5966/12318 [10:19:18<10:59:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5966/12318 [10:19:18<10:59:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5967/12318 [10:19:25<10:59:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5967/12318 [10:19:25<10:59:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5968/12318 [10:19:33<10:59:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5968/12318 [10:19:33<10:59:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5969/12318 [10:19:37<10:59:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5969/12318 [10:19:37<10:59:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5970/12318 [10:19:41<10:58:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5970/12318 [10:19:41<10:58:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5971/12318 [10:19:47<10:58:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5971/12318 [10:19:47<10:58:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5972/12318 [10:19:55<10:58:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5972/12318 [10:19:55<10:58:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5973/12318 [10:19:59<10:58:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5973/12318 [10:19:59<10:58:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  48%|▍| 5974/12318 [10:20:06<10:58:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  48%|▍| 5974/12318 [10:20:06<10:58:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5975/12318 [10:20:14<10:58:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5975/12318 [10:20:14<10:58:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5976/12318 [10:20:18<10:58:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5976/12318 [10:20:18<10:58:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5977/12318 [10:20:23<10:58:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5977/12318 [10:20:23<10:58:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5978/12318 [10:20:27<10:58:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5978/12318 [10:20:28<10:58:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5979/12318 [10:20:31<10:57:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5979/12318 [10:20:31<10:57:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5980/12318 [10:20:33<10:57:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5980/12318 [10:20:33<10:57:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5981/12318 [10:20:38<10:57:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5981/12318 [10:20:38<10:57:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5982/12318 [10:20:44<10:57:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5982/12318 [10:20:44<10:57:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5983/12318 [10:20:49<10:57:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5983/12318 [10:20:49<10:57:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5984/12318 [10:21:27<10:57:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5984/12318 [10:21:27<10:57:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5985/12318 [10:21:33<10:57:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5985/12318 [10:21:33<10:57:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5986/12318 [10:21:41<10:57:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5986/12318 [10:21:41<10:57:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5987/12318 [10:21:49<10:57:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5987/12318 [10:21:49<10:57:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5988/12318 [10:21:56<10:57:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5988/12318 [10:21:56<10:57:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5989/12318 [10:22:00<10:57:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5989/12318 [10:22:00<10:57:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5990/12318 [10:22:07<10:57:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5990/12318 [10:22:07<10:57:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5991/12318 [10:22:10<10:57:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5991/12318 [10:22:10<10:57:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5992/12318 [10:22:18<10:57:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5992/12318 [10:22:18<10:57:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5993/12318 [10:22:22<10:56:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5993/12318 [10:22:22<10:56:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5994/12318 [10:22:31<10:56:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5994/12318 [10:22:31<10:56:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5995/12318 [10:22:38<10:56:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5995/12318 [10:22:38<10:56:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5996/12318 [10:22:44<10:56:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5996/12318 [10:22:44<10:56:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5997/12318 [10:22:51<10:56:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5997/12318 [10:22:51<10:56:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5998/12318 [10:22:56<10:56:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5998/12318 [10:22:56<10:56:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 5999/12318 [10:23:01<10:56:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 5999/12318 [10:23:01<10:56:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6000/12318 [10:23:09<10:56:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6000/12318 [10:23:09<10:56:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6001/12318 [10:23:12<10:56:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6001/12318 [10:23:12<10:56:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6002/12318 [10:23:13<10:55:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6002/12318 [10:23:13<10:55:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6003/12318 [10:23:19<10:55:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6003/12318 [10:23:19<10:55:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6004/12318 [10:23:24<10:55:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6004/12318 [10:23:24<10:55:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6005/12318 [10:23:30<10:55:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6005/12318 [10:23:30<10:55:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6006/12318 [10:23:37<10:55:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6006/12318 [10:23:37<10:55:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6007/12318 [10:23:44<10:55:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6007/12318 [10:23:44<10:55:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6008/12318 [10:23:49<10:55:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6008/12318 [10:23:49<10:55:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6009/12318 [10:23:52<10:55:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6009/12318 [10:23:52<10:55:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6010/12318 [10:23:57<10:54:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6010/12318 [10:23:57<10:54:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6011/12318 [10:23:59<10:54:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6011/12318 [10:23:59<10:54:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6012/12318 [10:24:04<10:54:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6012/12318 [10:24:04<10:54:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6013/12318 [10:24:06<10:54:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6013/12318 [10:24:06<10:54:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6014/12318 [10:24:13<10:54:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6014/12318 [10:24:13<10:54:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6015/12318 [10:24:14<10:54:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6015/12318 [10:24:14<10:54:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6016/12318 [10:24:46<10:54:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6016/12318 [10:24:46<10:54:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6017/12318 [10:24:52<10:54:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6017/12318 [10:24:52<10:54:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6018/12318 [10:24:57<10:54:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6018/12318 [10:24:57<10:54:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6019/12318 [10:25:00<10:54:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6019/12318 [10:25:00<10:54:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6020/12318 [10:25:04<10:53:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6020/12318 [10:25:04<10:53:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6021/12318 [10:25:07<10:53:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6021/12318 [10:25:07<10:53:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6022/12318 [10:25:14<10:53:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6022/12318 [10:25:14<10:53:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6023/12318 [10:25:16<10:53:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6023/12318 [10:25:16<10:53:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6024/12318 [10:25:19<10:53:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6024/12318 [10:25:19<10:53:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6025/12318 [10:25:22<10:53:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6025/12318 [10:25:22<10:53:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6026/12318 [10:25:24<10:53:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6026/12318 [10:25:24<10:53:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6027/12318 [10:25:32<10:52:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6027/12318 [10:25:32<10:52:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6028/12318 [10:25:39<10:52:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6028/12318 [10:25:39<10:52:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6029/12318 [10:25:44<10:52:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6029/12318 [10:25:44<10:52:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6030/12318 [10:25:53<10:52:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6030/12318 [10:25:53<10:52:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6031/12318 [10:26:02<10:52:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6031/12318 [10:26:02<10:52:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6032/12318 [10:26:08<10:52:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6032/12318 [10:26:08<10:52:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6033/12318 [10:26:10<10:52:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6033/12318 [10:26:10<10:52:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6034/12318 [10:26:15<10:52:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6034/12318 [10:26:15<10:52:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6035/12318 [10:26:19<10:52:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6035/12318 [10:26:19<10:52:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6036/12318 [10:26:24<10:51:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6036/12318 [10:26:24<10:51:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6037/12318 [10:26:27<10:51:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6037/12318 [10:26:27<10:51:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6038/12318 [10:26:34<10:51:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6038/12318 [10:26:34<10:51:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6039/12318 [10:26:39<10:51:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6039/12318 [10:26:39<10:51:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6040/12318 [10:26:46<10:51:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6040/12318 [10:26:46<10:51:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6041/12318 [10:26:51<10:51:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6041/12318 [10:26:51<10:51:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6042/12318 [10:26:56<10:51:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6042/12318 [10:26:56<10:51:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6043/12318 [10:26:59<10:51:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6043/12318 [10:26:59<10:51:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6044/12318 [10:27:07<10:50:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6044/12318 [10:27:07<10:50:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6045/12318 [10:27:11<10:50:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6045/12318 [10:27:11<10:50:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6046/12318 [10:27:20<10:50:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6046/12318 [10:27:20<10:50:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6047/12318 [10:27:27<10:50:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6047/12318 [10:27:27<10:50:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6048/12318 [10:27:50<10:50:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6048/12318 [10:27:50<10:50:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6049/12318 [10:27:54<10:50:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6049/12318 [10:27:54<10:50:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6050/12318 [10:28:01<10:50:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6050/12318 [10:28:01<10:50:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6051/12318 [10:28:08<10:50:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6051/12318 [10:28:08<10:50:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6052/12318 [10:28:15<10:50:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6052/12318 [10:28:15<10:50:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6053/12318 [10:28:17<10:50:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6053/12318 [10:28:17<10:50:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6054/12318 [10:28:23<10:50:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6054/12318 [10:28:23<10:50:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6055/12318 [10:28:29<10:50:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6055/12318 [10:28:29<10:50:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6056/12318 [10:28:35<10:49:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6056/12318 [10:28:35<10:49:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6057/12318 [10:28:37<10:49:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6057/12318 [10:28:37<10:49:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6058/12318 [10:28:41<10:49:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6058/12318 [10:28:41<10:49:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6059/12318 [10:28:44<10:49:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6059/12318 [10:28:44<10:49:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6060/12318 [10:28:48<10:49:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6060/12318 [10:28:48<10:49:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6061/12318 [10:28:55<10:49:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6061/12318 [10:28:55<10:49:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6062/12318 [10:28:59<10:49:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6062/12318 [10:28:59<10:49:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6063/12318 [10:29:06<10:49:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6063/12318 [10:29:06<10:49:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6064/12318 [10:29:11<10:48:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6064/12318 [10:29:11<10:48:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6065/12318 [10:29:18<10:48:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6065/12318 [10:29:18<10:48:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6066/12318 [10:29:25<10:48:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6066/12318 [10:29:25<10:48:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6067/12318 [10:29:32<10:48:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6067/12318 [10:29:32<10:48:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6068/12318 [10:29:39<10:48:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6068/12318 [10:29:39<10:48:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6069/12318 [10:29:46<10:48:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6069/12318 [10:29:46<10:48:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6070/12318 [10:29:51<10:48:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6070/12318 [10:29:51<10:48:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6071/12318 [10:29:55<10:48:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6071/12318 [10:29:55<10:48:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6072/12318 [10:30:04<10:48:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6072/12318 [10:30:04<10:48:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6073/12318 [10:30:09<10:48:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6073/12318 [10:30:09<10:48:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6074/12318 [10:30:13<10:47:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6074/12318 [10:30:13<10:47:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6075/12318 [10:30:18<10:47:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6075/12318 [10:30:18<10:47:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6076/12318 [10:30:20<10:47:33,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6076/12318 [10:30:20<10:47:33,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6077/12318 [10:30:21<10:47:22,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6077/12318 [10:30:21<10:47:22,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6078/12318 [10:30:24<10:47:12,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6078/12318 [10:30:24<10:47:12,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6079/12318 [10:30:28<10:47:03,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6079/12318 [10:30:28<10:47:03,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6080/12318 [10:31:12<10:47:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6080/12318 [10:31:12<10:47:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6081/12318 [10:31:20<10:47:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6081/12318 [10:31:20<10:47:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6082/12318 [10:31:25<10:47:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6082/12318 [10:31:25<10:47:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6083/12318 [10:31:30<10:47:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6083/12318 [10:31:30<10:47:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6084/12318 [10:31:36<10:47:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6084/12318 [10:31:36<10:47:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6085/12318 [10:31:40<10:47:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6085/12318 [10:31:40<10:47:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6086/12318 [10:31:45<10:46:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6086/12318 [10:31:45<10:46:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6087/12318 [10:31:47<10:46:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6087/12318 [10:31:47<10:46:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6088/12318 [10:31:56<10:46:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6088/12318 [10:31:56<10:46:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6089/12318 [10:32:03<10:46:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6089/12318 [10:32:03<10:46:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6090/12318 [10:32:05<10:46:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6090/12318 [10:32:05<10:46:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6091/12318 [10:32:12<10:46:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6091/12318 [10:32:12<10:46:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6092/12318 [10:32:16<10:46:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6092/12318 [10:32:16<10:46:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6093/12318 [10:32:21<10:46:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6093/12318 [10:32:21<10:46:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6094/12318 [10:32:24<10:45:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6094/12318 [10:32:24<10:45:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6095/12318 [10:32:33<10:45:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6095/12318 [10:32:33<10:45:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6096/12318 [10:32:38<10:45:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6096/12318 [10:32:38<10:45:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  49%|▍| 6097/12318 [10:32:42<10:45:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  49%|▍| 6097/12318 [10:32:42<10:45:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6098/12318 [10:32:48<10:45:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6098/12318 [10:32:48<10:45:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6099/12318 [10:32:54<10:45:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6099/12318 [10:32:54<10:45:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6100/12318 [10:32:58<10:45:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6100/12318 [10:32:58<10:45:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6101/12318 [10:33:03<10:45:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6101/12318 [10:33:03<10:45:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6102/12318 [10:33:12<10:45:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6102/12318 [10:33:12<10:45:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6103/12318 [10:33:19<10:44:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6103/12318 [10:33:19<10:44:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6104/12318 [10:33:25<10:44:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6104/12318 [10:33:25<10:44:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6105/12318 [10:33:30<10:44:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6105/12318 [10:33:30<10:44:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6106/12318 [10:33:38<10:44:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6106/12318 [10:33:38<10:44:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6107/12318 [10:33:43<10:44:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6107/12318 [10:33:43<10:44:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6108/12318 [10:33:52<10:44:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6108/12318 [10:33:52<10:44:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6109/12318 [10:33:58<10:44:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6109/12318 [10:33:58<10:44:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6110/12318 [10:34:04<10:44:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6110/12318 [10:34:04<10:44:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6111/12318 [10:34:11<10:44:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6111/12318 [10:34:11<10:44:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6112/12318 [10:34:36<10:44:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6112/12318 [10:34:36<10:44:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6113/12318 [10:34:43<10:44:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6113/12318 [10:34:43<10:44:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6114/12318 [10:34:45<10:44:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6114/12318 [10:34:45<10:44:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6115/12318 [10:34:53<10:44:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6115/12318 [10:34:53<10:44:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6116/12318 [10:35:01<10:43:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6116/12318 [10:35:01<10:43:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6117/12318 [10:35:02<10:43:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6117/12318 [10:35:02<10:43:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6118/12318 [10:35:06<10:43:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6118/12318 [10:35:06<10:43:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6119/12318 [10:35:13<10:43:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6119/12318 [10:35:13<10:43:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6120/12318 [10:35:19<10:43:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6120/12318 [10:35:19<10:43:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6121/12318 [10:35:28<10:43:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6121/12318 [10:35:28<10:43:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6122/12318 [10:35:32<10:43:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6122/12318 [10:35:32<10:43:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6123/12318 [10:35:35<10:43:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6123/12318 [10:35:35<10:43:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6124/12318 [10:35:39<10:42:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6124/12318 [10:35:39<10:42:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6125/12318 [10:35:46<10:42:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6125/12318 [10:35:46<10:42:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6126/12318 [10:35:54<10:42:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6126/12318 [10:35:54<10:42:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6127/12318 [10:35:58<10:42:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6127/12318 [10:35:58<10:42:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6128/12318 [10:36:05<10:42:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6128/12318 [10:36:05<10:42:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6129/12318 [10:36:08<10:42:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6129/12318 [10:36:08<10:42:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6130/12318 [10:36:10<10:42:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6130/12318 [10:36:10<10:42:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6131/12318 [10:36:19<10:42:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6131/12318 [10:36:19<10:42:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6132/12318 [10:36:25<10:42:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6132/12318 [10:36:25<10:42:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6133/12318 [10:36:26<10:41:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6133/12318 [10:36:26<10:41:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6134/12318 [10:36:33<10:41:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6134/12318 [10:36:33<10:41:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6135/12318 [10:36:38<10:41:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6135/12318 [10:36:38<10:41:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6136/12318 [10:36:45<10:41:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6136/12318 [10:36:45<10:41:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6137/12318 [10:36:51<10:41:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6137/12318 [10:36:51<10:41:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6138/12318 [10:36:58<10:41:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6138/12318 [10:36:58<10:41:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6139/12318 [10:37:07<10:41:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6139/12318 [10:37:07<10:41:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6140/12318 [10:37:12<10:41:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6140/12318 [10:37:12<10:41:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6141/12318 [10:37:16<10:41:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6141/12318 [10:37:16<10:41:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6142/12318 [10:37:19<10:40:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6142/12318 [10:37:19<10:40:51,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6143/12318 [10:37:26<10:40:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6143/12318 [10:37:26<10:40:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6144/12318 [10:37:52<10:40:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6144/12318 [10:37:52<10:40:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6145/12318 [10:38:01<10:40:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6145/12318 [10:38:01<10:40:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6146/12318 [10:38:07<10:40:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6146/12318 [10:38:07<10:40:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6147/12318 [10:38:13<10:40:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6147/12318 [10:38:13<10:40:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6148/12318 [10:38:21<10:40:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6148/12318 [10:38:21<10:40:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6149/12318 [10:38:30<10:40:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6149/12318 [10:38:30<10:40:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6150/12318 [10:38:36<10:40:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6150/12318 [10:38:36<10:40:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6151/12318 [10:38:43<10:40:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6151/12318 [10:38:43<10:40:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6152/12318 [10:38:48<10:40:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6152/12318 [10:38:48<10:40:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6153/12318 [10:38:57<10:40:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6153/12318 [10:38:57<10:40:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6154/12318 [10:39:05<10:40:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6154/12318 [10:39:05<10:40:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6155/12318 [10:39:07<10:39:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6155/12318 [10:39:07<10:39:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6156/12318 [10:39:16<10:39:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6156/12318 [10:39:16<10:39:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6157/12318 [10:39:20<10:39:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6157/12318 [10:39:20<10:39:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▍| 6158/12318 [10:39:25<10:39:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▍| 6158/12318 [10:39:25<10:39:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6159/12318 [10:39:33<10:39:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6159/12318 [10:39:33<10:39:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6160/12318 [10:39:38<10:39:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6160/12318 [10:39:38<10:39:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6161/12318 [10:39:46<10:39:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6161/12318 [10:39:46<10:39:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6162/12318 [10:39:49<10:39:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6162/12318 [10:39:49<10:39:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6163/12318 [10:39:57<10:39:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6163/12318 [10:39:57<10:39:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6164/12318 [10:40:06<10:39:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6164/12318 [10:40:06<10:39:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6165/12318 [10:40:09<10:38:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6165/12318 [10:40:09<10:38:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6166/12318 [10:40:12<10:38:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6166/12318 [10:40:12<10:38:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6167/12318 [10:40:16<10:38:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6167/12318 [10:40:16<10:38:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6168/12318 [10:40:19<10:38:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6168/12318 [10:40:19<10:38:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6169/12318 [10:40:24<10:38:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6169/12318 [10:40:24<10:38:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6170/12318 [10:40:27<10:38:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6170/12318 [10:40:27<10:38:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6171/12318 [10:40:35<10:38:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6171/12318 [10:40:35<10:38:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6172/12318 [10:40:36<10:37:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6172/12318 [10:40:36<10:37:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6173/12318 [10:40:43<10:37:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6173/12318 [10:40:43<10:37:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6174/12318 [10:40:50<10:37:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6174/12318 [10:40:50<10:37:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6175/12318 [10:40:53<10:37:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6175/12318 [10:40:53<10:37:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6176/12318 [10:41:21<10:37:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6176/12318 [10:41:21<10:37:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6177/12318 [10:41:25<10:37:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6177/12318 [10:41:25<10:37:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6178/12318 [10:41:31<10:37:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6178/12318 [10:41:31<10:37:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6179/12318 [10:41:39<10:37:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6179/12318 [10:41:39<10:37:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6180/12318 [10:41:45<10:37:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6180/12318 [10:41:45<10:37:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6181/12318 [10:41:46<10:37:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6181/12318 [10:41:46<10:37:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6182/12318 [10:41:51<10:37:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6182/12318 [10:41:51<10:37:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6183/12318 [10:41:59<10:36:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6183/12318 [10:41:59<10:36:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6184/12318 [10:42:07<10:36:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6184/12318 [10:42:07<10:36:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6185/12318 [10:42:12<10:36:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6185/12318 [10:42:12<10:36:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6186/12318 [10:42:21<10:36:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6186/12318 [10:42:21<10:36:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6187/12318 [10:42:25<10:36:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6187/12318 [10:42:25<10:36:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6188/12318 [10:42:28<10:36:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6188/12318 [10:42:28<10:36:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6189/12318 [10:42:30<10:36:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6189/12318 [10:42:30<10:36:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6190/12318 [10:42:35<10:36:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6190/12318 [10:42:35<10:36:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6191/12318 [10:42:41<10:36:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6191/12318 [10:42:41<10:36:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6192/12318 [10:42:45<10:35:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6192/12318 [10:42:45<10:35:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6193/12318 [10:42:51<10:35:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6193/12318 [10:42:51<10:35:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6194/12318 [10:43:00<10:35:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6194/12318 [10:43:00<10:35:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6195/12318 [10:43:07<10:35:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6195/12318 [10:43:07<10:35:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6196/12318 [10:43:12<10:35:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6196/12318 [10:43:12<10:35:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6197/12318 [10:43:14<10:35:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6197/12318 [10:43:14<10:35:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6198/12318 [10:43:17<10:35:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6198/12318 [10:43:17<10:35:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6199/12318 [10:43:23<10:35:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6199/12318 [10:43:23<10:35:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6200/12318 [10:43:32<10:35:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6200/12318 [10:43:32<10:35:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6201/12318 [10:43:36<10:34:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6201/12318 [10:43:36<10:34:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6202/12318 [10:43:41<10:34:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6202/12318 [10:43:41<10:34:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6203/12318 [10:43:47<10:34:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6203/12318 [10:43:47<10:34:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6204/12318 [10:43:54<10:34:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6204/12318 [10:43:54<10:34:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6205/12318 [10:43:58<10:34:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6205/12318 [10:43:58<10:34:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6206/12318 [10:44:05<10:34:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6206/12318 [10:44:05<10:34:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6207/12318 [10:44:10<10:34:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6207/12318 [10:44:10<10:34:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6208/12318 [10:44:44<10:34:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6208/12318 [10:44:44<10:34:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6209/12318 [10:44:48<10:34:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6209/12318 [10:44:48<10:34:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6210/12318 [10:44:57<10:34:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6210/12318 [10:44:57<10:34:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6211/12318 [10:45:03<10:34:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6211/12318 [10:45:03<10:34:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6212/12318 [10:45:07<10:34:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6212/12318 [10:45:07<10:34:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6213/12318 [10:45:15<10:34:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6213/12318 [10:45:15<10:34:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6214/12318 [10:45:19<10:33:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6214/12318 [10:45:19<10:33:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6215/12318 [10:45:28<10:33:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6215/12318 [10:45:28<10:33:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6216/12318 [10:45:31<10:33:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6216/12318 [10:45:31<10:33:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6217/12318 [10:45:39<10:33:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6217/12318 [10:45:39<10:33:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6218/12318 [10:45:45<10:33:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6218/12318 [10:45:45<10:33:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6219/12318 [10:45:48<10:33:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6219/12318 [10:45:48<10:33:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  50%|▌| 6220/12318 [10:45:53<10:33:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  50%|▌| 6220/12318 [10:45:53<10:33:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6221/12318 [10:45:56<10:33:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6221/12318 [10:45:56<10:33:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6222/12318 [10:46:01<10:32:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6222/12318 [10:46:01<10:32:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6223/12318 [10:46:05<10:32:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6223/12318 [10:46:05<10:32:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6224/12318 [10:46:08<10:32:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6224/12318 [10:46:08<10:32:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6225/12318 [10:46:11<10:32:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6225/12318 [10:46:11<10:32:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6226/12318 [10:46:12<10:32:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6226/12318 [10:46:12<10:32:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6227/12318 [10:46:19<10:32:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6227/12318 [10:46:19<10:32:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6228/12318 [10:46:22<10:32:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6228/12318 [10:46:22<10:32:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6229/12318 [10:46:26<10:31:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6229/12318 [10:46:26<10:31:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6230/12318 [10:46:30<10:31:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6230/12318 [10:46:30<10:31:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6231/12318 [10:46:36<10:31:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6231/12318 [10:46:36<10:31:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6232/12318 [10:46:40<10:31:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6232/12318 [10:46:40<10:31:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6233/12318 [10:46:42<10:31:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6233/12318 [10:46:42<10:31:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6234/12318 [10:46:49<10:31:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6234/12318 [10:46:49<10:31:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6235/12318 [10:46:50<10:31:04,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6235/12318 [10:46:50<10:31:04,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6236/12318 [10:46:53<10:30:54,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6236/12318 [10:46:53<10:30:54,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6237/12318 [10:46:54<10:30:43,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6237/12318 [10:46:54<10:30:43,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6238/12318 [10:47:02<10:30:39,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6238/12318 [10:47:02<10:30:39,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6239/12318 [10:47:07<10:30:31,  6.22s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6239/12318 [10:47:07<10:30:31,  6.22s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6240/12318 [10:48:10<10:31:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6240/12318 [10:48:10<10:31:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6241/12318 [10:48:15<10:31:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6241/12318 [10:48:15<10:31:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6242/12318 [10:48:21<10:31:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6242/12318 [10:48:21<10:31:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6243/12318 [10:48:22<10:30:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6243/12318 [10:48:22<10:30:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6244/12318 [10:48:27<10:30:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6244/12318 [10:48:28<10:30:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6245/12318 [10:48:36<10:30:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6245/12318 [10:48:36<10:30:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6246/12318 [10:48:44<10:30:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6246/12318 [10:48:44<10:30:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6247/12318 [10:48:50<10:30:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6247/12318 [10:48:50<10:30:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6248/12318 [10:48:59<10:30:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6248/12318 [10:48:59<10:30:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6249/12318 [10:49:03<10:30:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6249/12318 [10:49:03<10:30:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6250/12318 [10:49:08<10:30:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6250/12318 [10:49:08<10:30:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6251/12318 [10:49:10<10:30:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6251/12318 [10:49:10<10:30:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6252/12318 [10:49:14<10:29:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6252/12318 [10:49:14<10:29:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6253/12318 [10:49:18<10:29:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6253/12318 [10:49:18<10:29:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6254/12318 [10:49:24<10:29:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6254/12318 [10:49:24<10:29:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6255/12318 [10:49:31<10:29:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6255/12318 [10:49:31<10:29:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6256/12318 [10:49:39<10:29:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6256/12318 [10:49:39<10:29:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6257/12318 [10:49:43<10:29:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6257/12318 [10:49:43<10:29:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6258/12318 [10:49:52<10:29:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6258/12318 [10:49:52<10:29:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6259/12318 [10:49:57<10:29:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6259/12318 [10:49:57<10:29:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6260/12318 [10:50:06<10:29:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6260/12318 [10:50:06<10:29:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6261/12318 [10:50:08<10:28:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6261/12318 [10:50:08<10:28:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6262/12318 [10:50:12<10:28:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6262/12318 [10:50:12<10:28:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6263/12318 [10:50:19<10:28:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6263/12318 [10:50:19<10:28:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6264/12318 [10:50:22<10:28:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6264/12318 [10:50:22<10:28:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6265/12318 [10:50:25<10:28:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6265/12318 [10:50:25<10:28:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6266/12318 [10:50:33<10:28:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6266/12318 [10:50:33<10:28:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6267/12318 [10:50:35<10:28:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6267/12318 [10:50:35<10:28:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6268/12318 [10:50:39<10:28:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6268/12318 [10:50:39<10:28:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6269/12318 [10:50:40<10:27:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6269/12318 [10:50:40<10:27:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6270/12318 [10:50:45<10:27:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6270/12318 [10:50:45<10:27:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6271/12318 [10:50:50<10:27:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6271/12318 [10:50:50<10:27:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6272/12318 [10:51:37<10:28:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6272/12318 [10:51:37<10:28:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6273/12318 [10:51:38<10:27:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6273/12318 [10:51:38<10:27:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6274/12318 [10:51:42<10:27:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6274/12318 [10:51:42<10:27:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6275/12318 [10:51:51<10:27:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6275/12318 [10:51:51<10:27:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6276/12318 [10:51:54<10:27:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6276/12318 [10:51:54<10:27:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6277/12318 [10:52:03<10:27:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6277/12318 [10:52:03<10:27:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6278/12318 [10:52:09<10:27:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6278/12318 [10:52:09<10:27:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6279/12318 [10:52:14<10:27:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6279/12318 [10:52:14<10:27:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6280/12318 [10:52:23<10:27:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6280/12318 [10:52:23<10:27:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6281/12318 [10:52:28<10:27:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6281/12318 [10:52:28<10:27:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6282/12318 [10:52:35<10:27:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6282/12318 [10:52:35<10:27:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6283/12318 [10:52:40<10:26:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6283/12318 [10:52:40<10:26:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6284/12318 [10:52:45<10:26:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6284/12318 [10:52:45<10:26:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6285/12318 [10:52:48<10:26:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6285/12318 [10:52:48<10:26:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6286/12318 [10:52:55<10:26:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6286/12318 [10:52:55<10:26:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6287/12318 [10:53:04<10:26:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6287/12318 [10:53:04<10:26:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6288/12318 [10:53:10<10:26:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6288/12318 [10:53:10<10:26:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6289/12318 [10:53:18<10:26:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6289/12318 [10:53:18<10:26:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6290/12318 [10:53:26<10:26:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6290/12318 [10:53:26<10:26:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6291/12318 [10:53:33<10:26:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6291/12318 [10:53:33<10:26:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6292/12318 [10:53:41<10:26:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6292/12318 [10:53:41<10:26:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6293/12318 [10:53:43<10:25:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6293/12318 [10:53:43<10:25:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6294/12318 [10:53:49<10:25:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6294/12318 [10:53:49<10:25:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6295/12318 [10:53:52<10:25:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6295/12318 [10:53:52<10:25:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6296/12318 [10:53:56<10:25:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6296/12318 [10:53:56<10:25:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6297/12318 [10:54:00<10:25:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6297/12318 [10:54:00<10:25:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6298/12318 [10:54:08<10:25:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6298/12318 [10:54:08<10:25:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6299/12318 [10:54:17<10:25:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6299/12318 [10:54:17<10:25:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6300/12318 [10:54:21<10:25:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6300/12318 [10:54:21<10:25:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6301/12318 [10:54:26<10:24:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6301/12318 [10:54:26<10:24:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6302/12318 [10:54:32<10:24:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6302/12318 [10:54:32<10:24:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6303/12318 [10:54:37<10:24:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6303/12318 [10:54:37<10:24:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6304/12318 [10:54:51<10:24:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6304/12318 [10:54:51<10:24:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6305/12318 [10:54:56<10:24:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6305/12318 [10:54:56<10:24:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6306/12318 [10:55:01<10:24:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6306/12318 [10:55:01<10:24:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6307/12318 [10:55:02<10:24:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6307/12318 [10:55:02<10:24:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6308/12318 [10:55:10<10:24:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6308/12318 [10:55:10<10:24:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6309/12318 [10:55:12<10:24:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6309/12318 [10:55:12<10:24:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6310/12318 [10:55:18<10:23:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6310/12318 [10:55:18<10:23:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6311/12318 [10:55:25<10:23:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6311/12318 [10:55:25<10:23:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6312/12318 [10:55:32<10:23:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6312/12318 [10:55:32<10:23:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6313/12318 [10:55:38<10:23:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6313/12318 [10:55:38<10:23:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6314/12318 [10:55:42<10:23:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6314/12318 [10:55:42<10:23:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6315/12318 [10:55:50<10:23:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6315/12318 [10:55:50<10:23:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6316/12318 [10:55:59<10:23:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6316/12318 [10:55:59<10:23:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6317/12318 [10:56:07<10:23:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6317/12318 [10:56:07<10:23:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6318/12318 [10:56:14<10:23:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6318/12318 [10:56:14<10:23:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6319/12318 [10:56:20<10:23:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6319/12318 [10:56:20<10:23:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6320/12318 [10:56:27<10:23:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6320/12318 [10:56:27<10:23:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6321/12318 [10:56:32<10:22:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6321/12318 [10:56:32<10:22:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6322/12318 [10:56:40<10:22:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6322/12318 [10:56:40<10:22:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6323/12318 [10:56:48<10:22:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6323/12318 [10:56:48<10:22:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6324/12318 [10:56:54<10:22:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6324/12318 [10:56:54<10:22:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6325/12318 [10:56:57<10:22:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6325/12318 [10:56:57<10:22:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6326/12318 [10:57:02<10:22:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6326/12318 [10:57:02<10:22:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6327/12318 [10:57:11<10:22:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6327/12318 [10:57:11<10:22:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6328/12318 [10:57:17<10:22:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6328/12318 [10:57:17<10:22:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6329/12318 [10:57:25<10:22:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6329/12318 [10:57:25<10:22:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6330/12318 [10:57:34<10:22:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6330/12318 [10:57:34<10:22:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6331/12318 [10:57:42<10:21:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6331/12318 [10:57:42<10:21:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6332/12318 [10:57:47<10:21:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6332/12318 [10:57:47<10:21:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6333/12318 [10:57:53<10:21:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6333/12318 [10:57:53<10:21:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6334/12318 [10:57:54<10:21:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6334/12318 [10:57:54<10:21:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6335/12318 [10:57:59<10:21:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6335/12318 [10:57:59<10:21:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6336/12318 [10:58:17<10:21:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6336/12318 [10:58:17<10:21:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6337/12318 [10:58:24<10:21:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6337/12318 [10:58:24<10:21:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6338/12318 [10:58:33<10:21:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6338/12318 [10:58:33<10:21:21,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6339/12318 [10:58:39<10:21:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6339/12318 [10:58:39<10:21:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6340/12318 [10:58:47<10:21:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6340/12318 [10:58:47<10:21:10,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6341/12318 [10:58:49<10:21:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6341/12318 [10:58:49<10:21:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6342/12318 [10:58:53<10:20:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6342/12318 [10:58:53<10:20:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  51%|▌| 6343/12318 [10:59:00<10:20:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  51%|▌| 6343/12318 [10:59:00<10:20:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6344/12318 [10:59:05<10:20:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6344/12318 [10:59:05<10:20:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6345/12318 [10:59:07<10:20:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6345/12318 [10:59:07<10:20:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6346/12318 [10:59:14<10:20:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6346/12318 [10:59:14<10:20:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6347/12318 [10:59:22<10:20:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6347/12318 [10:59:22<10:20:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6348/12318 [10:59:30<10:20:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6348/12318 [10:59:30<10:20:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6349/12318 [10:59:36<10:20:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6349/12318 [10:59:36<10:20:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6350/12318 [10:59:38<10:19:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6350/12318 [10:59:38<10:19:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6351/12318 [10:59:42<10:19:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6351/12318 [10:59:42<10:19:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6352/12318 [10:59:45<10:19:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6352/12318 [10:59:45<10:19:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6353/12318 [10:59:51<10:19:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6353/12318 [10:59:51<10:19:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6354/12318 [10:59:53<10:19:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6354/12318 [10:59:53<10:19:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6355/12318 [10:59:54<10:19:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6355/12318 [10:59:54<10:19:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6356/12318 [11:00:03<10:19:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6356/12318 [11:00:03<10:19:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6357/12318 [11:00:10<10:19:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6357/12318 [11:00:10<10:19:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6358/12318 [11:00:13<10:18:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6358/12318 [11:00:13<10:18:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6359/12318 [11:00:17<10:18:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6359/12318 [11:00:17<10:18:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6360/12318 [11:00:25<10:18:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6360/12318 [11:00:25<10:18:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6361/12318 [11:00:29<10:18:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6361/12318 [11:00:29<10:18:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6362/12318 [11:00:37<10:18:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6362/12318 [11:00:37<10:18:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6363/12318 [11:00:41<10:18:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6363/12318 [11:00:41<10:18:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6364/12318 [11:00:47<10:18:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6364/12318 [11:00:47<10:18:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6365/12318 [11:00:48<10:18:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6365/12318 [11:00:48<10:18:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6366/12318 [11:00:57<10:17:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6366/12318 [11:00:57<10:17:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6367/12318 [11:01:01<10:17:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6367/12318 [11:01:01<10:17:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6368/12318 [11:01:28<10:18:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6368/12318 [11:01:28<10:18:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6369/12318 [11:01:35<10:17:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6369/12318 [11:01:35<10:17:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6370/12318 [11:01:43<10:17:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6370/12318 [11:01:43<10:17:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6371/12318 [11:01:48<10:17:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6371/12318 [11:01:48<10:17:46,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6372/12318 [11:01:54<10:17:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6372/12318 [11:01:54<10:17:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6373/12318 [11:01:57<10:17:29,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6373/12318 [11:01:57<10:17:29,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6374/12318 [11:02:02<10:17:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6374/12318 [11:02:02<10:17:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6375/12318 [11:02:05<10:17:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6375/12318 [11:02:05<10:17:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6376/12318 [11:02:06<10:17:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6376/12318 [11:02:06<10:17:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6377/12318 [11:02:14<10:16:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6377/12318 [11:02:14<10:16:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6378/12318 [11:02:16<10:16:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6378/12318 [11:02:16<10:16:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6379/12318 [11:02:20<10:16:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6379/12318 [11:02:20<10:16:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6380/12318 [11:02:25<10:16:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6380/12318 [11:02:25<10:16:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6381/12318 [11:02:32<10:16:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6381/12318 [11:02:32<10:16:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6382/12318 [11:02:38<10:16:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6382/12318 [11:02:38<10:16:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6383/12318 [11:02:44<10:16:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6383/12318 [11:02:44<10:16:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6384/12318 [11:02:53<10:16:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6384/12318 [11:02:53<10:16:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6385/12318 [11:03:02<10:16:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6385/12318 [11:03:02<10:16:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6386/12318 [11:03:04<10:15:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6386/12318 [11:03:04<10:15:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6387/12318 [11:03:10<10:15:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6387/12318 [11:03:10<10:15:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6388/12318 [11:03:16<10:15:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6388/12318 [11:03:16<10:15:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6389/12318 [11:03:20<10:15:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6389/12318 [11:03:20<10:15:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6390/12318 [11:03:25<10:15:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6390/12318 [11:03:25<10:15:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6391/12318 [11:03:33<10:15:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6391/12318 [11:03:33<10:15:22,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6392/12318 [11:03:38<10:15:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6392/12318 [11:03:38<10:15:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6393/12318 [11:03:40<10:15:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6393/12318 [11:03:40<10:15:05,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6394/12318 [11:03:46<10:14:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6394/12318 [11:03:46<10:14:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6395/12318 [11:03:53<10:14:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6395/12318 [11:03:53<10:14:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6396/12318 [11:04:02<10:14:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6396/12318 [11:04:02<10:14:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6397/12318 [11:04:06<10:14:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6397/12318 [11:04:06<10:14:41,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6398/12318 [11:04:14<10:14:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6398/12318 [11:04:14<10:14:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6399/12318 [11:04:23<10:14:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6399/12318 [11:04:23<10:14:33,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6400/12318 [11:04:40<10:14:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6400/12318 [11:04:40<10:14:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6401/12318 [11:05:10<10:14:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6401/12318 [11:05:10<10:14:52,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6402/12318 [11:05:13<10:14:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6402/12318 [11:05:13<10:14:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6403/12318 [11:05:17<10:14:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6403/12318 [11:05:17<10:14:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6404/12318 [11:05:19<10:14:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6404/12318 [11:05:19<10:14:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6405/12318 [11:05:26<10:14:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6405/12318 [11:05:26<10:14:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6406/12318 [11:05:34<10:14:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6406/12318 [11:05:34<10:14:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6407/12318 [11:05:37<10:14:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6407/12318 [11:05:37<10:14:06,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6408/12318 [11:05:43<10:13:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6408/12318 [11:05:43<10:13:59,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6409/12318 [11:05:50<10:13:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6409/12318 [11:05:50<10:13:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6410/12318 [11:05:57<10:13:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6410/12318 [11:05:57<10:13:47,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6411/12318 [11:06:02<10:13:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6411/12318 [11:06:02<10:13:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6412/12318 [11:06:09<10:13:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6412/12318 [11:06:09<10:13:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6413/12318 [11:06:12<10:13:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6413/12318 [11:06:12<10:13:26,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6414/12318 [11:06:18<10:13:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6414/12318 [11:06:18<10:13:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6415/12318 [11:06:26<10:13:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6415/12318 [11:06:26<10:13:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6416/12318 [11:06:32<10:13:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6416/12318 [11:06:32<10:13:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6417/12318 [11:06:40<10:13:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6417/12318 [11:06:40<10:13:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6418/12318 [11:06:45<10:12:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6418/12318 [11:06:45<10:12:56,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6419/12318 [11:06:50<10:12:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6419/12318 [11:06:50<10:12:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6420/12318 [11:06:53<10:12:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6420/12318 [11:06:53<10:12:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6421/12318 [11:06:56<10:12:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6421/12318 [11:06:56<10:12:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6422/12318 [11:06:57<10:12:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6422/12318 [11:06:57<10:12:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6423/12318 [11:06:59<10:12:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6423/12318 [11:06:59<10:12:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6424/12318 [11:07:02<10:12:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6424/12318 [11:07:02<10:12:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6425/12318 [11:07:07<10:11:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6425/12318 [11:07:07<10:11:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6426/12318 [11:07:08<10:11:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6426/12318 [11:07:08<10:11:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6427/12318 [11:07:13<10:11:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6427/12318 [11:07:13<10:11:34,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6428/12318 [11:07:15<10:11:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6428/12318 [11:07:15<10:11:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6429/12318 [11:07:17<10:11:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6429/12318 [11:07:17<10:11:14,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6430/12318 [11:07:25<10:11:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6430/12318 [11:07:25<10:11:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6431/12318 [11:07:32<10:11:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6431/12318 [11:07:32<10:11:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6432/12318 [11:08:25<10:11:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6432/12318 [11:08:25<10:11:41,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6433/12318 [11:08:34<10:11:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6433/12318 [11:08:34<10:11:37,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6434/12318 [11:08:43<10:11:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6434/12318 [11:08:43<10:11:33,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6435/12318 [11:08:45<10:11:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6435/12318 [11:08:45<10:11:23,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6436/12318 [11:08:51<10:11:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6436/12318 [11:08:51<10:11:16,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6437/12318 [11:08:53<10:11:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6437/12318 [11:08:53<10:11:07,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6438/12318 [11:09:02<10:11:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6438/12318 [11:09:02<10:11:03,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6439/12318 [11:09:11<10:10:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6439/12318 [11:09:11<10:10:59,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6440/12318 [11:09:12<10:10:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6440/12318 [11:09:12<10:10:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6441/12318 [11:09:15<10:10:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6441/12318 [11:09:15<10:10:39,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6442/12318 [11:09:20<10:10:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6442/12318 [11:09:20<10:10:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6443/12318 [11:09:26<10:10:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6443/12318 [11:09:26<10:10:25,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6444/12318 [11:09:30<10:10:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6444/12318 [11:09:30<10:10:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6445/12318 [11:09:39<10:10:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6445/12318 [11:09:39<10:10:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6446/12318 [11:09:46<10:10:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6446/12318 [11:09:46<10:10:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6447/12318 [11:09:54<10:10:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6447/12318 [11:09:54<10:10:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6448/12318 [11:09:58<10:09:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6448/12318 [11:09:58<10:09:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6449/12318 [11:10:04<10:09:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6449/12318 [11:10:04<10:09:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6450/12318 [11:10:12<10:09:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6450/12318 [11:10:12<10:09:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6451/12318 [11:10:21<10:09:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6451/12318 [11:10:21<10:09:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6452/12318 [11:10:26<10:09:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6452/12318 [11:10:26<10:09:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6453/12318 [11:10:30<10:09:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6453/12318 [11:10:30<10:09:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6454/12318 [11:10:34<10:09:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6454/12318 [11:10:34<10:09:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6455/12318 [11:10:38<10:09:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6455/12318 [11:10:38<10:09:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6456/12318 [11:10:43<10:09:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6456/12318 [11:10:43<10:09:01,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6457/12318 [11:10:49<10:08:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6457/12318 [11:10:49<10:08:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6458/12318 [11:10:57<10:08:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6458/12318 [11:10:57<10:08:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6459/12318 [11:11:03<10:08:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6459/12318 [11:11:03<10:08:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6460/12318 [11:11:09<10:08:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6460/12318 [11:11:09<10:08:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6461/12318 [11:11:13<10:08:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6461/12318 [11:11:13<10:08:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6462/12318 [11:11:16<10:08:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6462/12318 [11:11:16<10:08:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6463/12318 [11:11:21<10:08:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6463/12318 [11:11:21<10:08:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6464/12318 [11:11:38<10:08:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6464/12318 [11:11:38<10:08:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6465/12318 [11:11:44<10:08:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6465/12318 [11:11:44<10:08:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  52%|▌| 6466/12318 [11:11:52<10:08:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  52%|▌| 6466/12318 [11:11:52<10:08:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6467/12318 [11:11:55<10:07:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6467/12318 [11:11:55<10:07:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6468/12318 [11:12:03<10:07:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6468/12318 [11:12:03<10:07:50,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6469/12318 [11:12:05<10:07:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6469/12318 [11:12:05<10:07:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6470/12318 [11:12:13<10:07:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6470/12318 [11:12:13<10:07:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6471/12318 [11:12:18<10:07:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6471/12318 [11:12:18<10:07:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6472/12318 [11:12:26<10:07:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6472/12318 [11:12:26<10:07:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6473/12318 [11:12:31<10:07:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6473/12318 [11:12:31<10:07:17,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6474/12318 [11:12:37<10:07:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6474/12318 [11:12:37<10:07:09,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6475/12318 [11:12:41<10:07:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6475/12318 [11:12:41<10:07:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6476/12318 [11:12:45<10:06:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6476/12318 [11:12:45<10:06:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6477/12318 [11:12:48<10:06:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6477/12318 [11:12:48<10:06:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6478/12318 [11:12:52<10:06:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6478/12318 [11:12:52<10:06:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6479/12318 [11:12:56<10:06:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6479/12318 [11:12:56<10:06:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6480/12318 [11:13:00<10:06:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6480/12318 [11:13:00<10:06:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6481/12318 [11:13:05<10:06:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6481/12318 [11:13:05<10:06:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6482/12318 [11:13:14<10:06:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6482/12318 [11:13:14<10:06:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6483/12318 [11:13:21<10:06:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6483/12318 [11:13:21<10:06:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6484/12318 [11:13:23<10:05:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6484/12318 [11:13:23<10:05:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6485/12318 [11:13:25<10:05:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6485/12318 [11:13:25<10:05:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6486/12318 [11:13:31<10:05:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6486/12318 [11:13:31<10:05:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6487/12318 [11:13:34<10:05:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6487/12318 [11:13:34<10:05:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6488/12318 [11:13:39<10:05:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6488/12318 [11:13:39<10:05:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6489/12318 [11:13:42<10:05:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6489/12318 [11:13:42<10:05:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6490/12318 [11:13:47<10:05:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6490/12318 [11:13:47<10:05:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6491/12318 [11:13:53<10:04:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6491/12318 [11:13:53<10:04:57,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6492/12318 [11:14:01<10:04:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6492/12318 [11:14:01<10:04:52,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6493/12318 [11:14:09<10:04:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6493/12318 [11:14:09<10:04:48,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6494/12318 [11:14:11<10:04:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6494/12318 [11:14:11<10:04:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6495/12318 [11:14:18<10:04:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6495/12318 [11:14:18<10:04:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6496/12318 [11:14:56<10:04:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6496/12318 [11:14:56<10:04:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6497/12318 [11:14:58<10:04:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6497/12318 [11:14:58<10:04:44,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6498/12318 [11:15:02<10:04:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6498/12318 [11:15:02<10:04:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6499/12318 [11:15:10<10:04:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6499/12318 [11:15:10<10:04:31,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6500/12318 [11:15:14<10:04:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6500/12318 [11:15:14<10:04:23,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6501/12318 [11:15:19<10:04:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6501/12318 [11:15:19<10:04:16,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6502/12318 [11:15:28<10:04:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6502/12318 [11:15:28<10:04:12,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6503/12318 [11:15:31<10:04:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6503/12318 [11:15:31<10:04:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6504/12318 [11:15:39<10:03:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6504/12318 [11:15:39<10:03:58,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6505/12318 [11:15:48<10:03:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6505/12318 [11:15:48<10:03:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6506/12318 [11:15:55<10:03:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6506/12318 [11:15:55<10:03:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6507/12318 [11:16:02<10:03:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6507/12318 [11:16:02<10:03:43,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6508/12318 [11:16:09<10:03:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6508/12318 [11:16:09<10:03:37,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6509/12318 [11:16:13<10:03:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6509/12318 [11:16:13<10:03:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6510/12318 [11:16:20<10:03:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6510/12318 [11:16:20<10:03:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6511/12318 [11:16:29<10:03:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6511/12318 [11:16:29<10:03:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6512/12318 [11:16:32<10:03:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6512/12318 [11:16:32<10:03:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6513/12318 [11:16:35<10:03:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6513/12318 [11:16:35<10:03:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6514/12318 [11:16:39<10:02:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6514/12318 [11:16:39<10:02:54,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6515/12318 [11:16:42<10:02:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6515/12318 [11:16:42<10:02:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6516/12318 [11:16:50<10:02:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6516/12318 [11:16:50<10:02:40,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6517/12318 [11:16:58<10:02:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6517/12318 [11:16:58<10:02:35,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6518/12318 [11:17:03<10:02:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6518/12318 [11:17:03<10:02:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6519/12318 [11:17:06<10:02:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6519/12318 [11:17:06<10:02:19,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6520/12318 [11:17:15<10:02:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6520/12318 [11:17:15<10:02:15,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6521/12318 [11:17:20<10:02:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6521/12318 [11:17:20<10:02:08,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6522/12318 [11:17:27<10:02:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6522/12318 [11:17:27<10:02:03,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6523/12318 [11:17:30<10:01:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6523/12318 [11:17:30<10:01:53,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6524/12318 [11:17:39<10:01:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6524/12318 [11:17:39<10:01:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6525/12318 [11:17:43<10:01:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6525/12318 [11:17:43<10:01:42,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6526/12318 [11:17:50<10:01:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6526/12318 [11:17:50<10:01:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6527/12318 [11:17:53<10:01:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6527/12318 [11:17:53<10:01:27,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6528/12318 [11:18:13<10:01:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6528/12318 [11:18:13<10:01:32,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6529/12318 [11:18:21<10:01:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6529/12318 [11:18:21<10:01:28,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6530/12318 [11:18:23<10:01:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6530/12318 [11:18:23<10:01:18,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6531/12318 [11:18:31<10:01:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6531/12318 [11:18:31<10:01:13,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6532/12318 [11:18:34<10:01:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6532/12318 [11:18:34<10:01:04,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6533/12318 [11:18:43<10:01:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6533/12318 [11:18:43<10:01:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6534/12318 [11:18:51<10:00:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6534/12318 [11:18:51<10:00:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6535/12318 [11:18:52<10:00:45,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6535/12318 [11:18:52<10:00:45,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6536/12318 [11:18:58<10:00:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6536/12318 [11:18:58<10:00:38,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6537/12318 [11:19:02<10:00:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6537/12318 [11:19:02<10:00:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6538/12318 [11:19:04<10:00:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6538/12318 [11:19:04<10:00:20,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6539/12318 [11:19:07<10:00:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6539/12318 [11:19:07<10:00:11,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6540/12318 [11:19:08<10:00:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  53%|▌| 6540/12318 [11:19:08<10:00:00,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6541/12318 [11:19:10<9:59:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6541/12318 [11:19:10<9:59:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6542/12318 [11:19:18<9:59:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6542/12318 [11:19:18<9:59:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6543/12318 [11:19:24<9:59:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6543/12318 [11:19:24<9:59:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6544/12318 [11:19:30<9:59:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6544/12318 [11:19:30<9:59:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6545/12318 [11:19:36<9:59:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6545/12318 [11:19:36<9:59:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6546/12318 [11:19:43<9:59:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6546/12318 [11:19:43<9:59:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6547/12318 [11:19:48<9:59:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6547/12318 [11:19:48<9:59:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6548/12318 [11:19:54<9:59:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6548/12318 [11:19:54<9:59:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6549/12318 [11:20:00<9:59:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6549/12318 [11:20:00<9:59:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6550/12318 [11:20:04<9:58:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6550/12318 [11:20:04<9:58:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6551/12318 [11:20:06<9:58:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6551/12318 [11:20:06<9:58:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6552/12318 [11:20:13<9:58:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6552/12318 [11:20:13<9:58:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6553/12318 [11:20:20<9:58:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6553/12318 [11:20:20<9:58:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6554/12318 [11:20:25<9:58:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6554/12318 [11:20:25<9:58:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6555/12318 [11:20:32<9:58:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6555/12318 [11:20:32<9:58:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6556/12318 [11:20:37<9:58:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6556/12318 [11:20:37<9:58:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6557/12318 [11:20:43<9:58:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6557/12318 [11:20:43<9:58:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6558/12318 [11:20:49<9:57:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6558/12318 [11:20:49<9:57:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6559/12318 [11:20:56<9:57:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6559/12318 [11:20:56<9:57:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6560/12318 [11:21:24<9:58:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6560/12318 [11:21:24<9:58:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6561/12318 [11:21:25<9:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6561/12318 [11:21:25<9:57:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6562/12318 [11:21:29<9:57:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6562/12318 [11:21:29<9:57:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6563/12318 [11:21:35<9:57:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6563/12318 [11:21:35<9:57:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6564/12318 [11:21:41<9:57:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6564/12318 [11:21:41<9:57:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6565/12318 [11:21:45<9:57:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6565/12318 [11:21:45<9:57:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6566/12318 [11:21:48<9:57:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6566/12318 [11:21:48<9:57:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6567/12318 [11:21:56<9:57:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6567/12318 [11:21:56<9:57:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6568/12318 [11:22:03<9:57:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6568/12318 [11:22:03<9:57:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6569/12318 [11:22:11<9:57:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6569/12318 [11:22:11<9:57:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6570/12318 [11:22:16<9:56:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6570/12318 [11:22:16<9:56:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6571/12318 [11:22:17<9:56:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6571/12318 [11:22:17<9:56:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6572/12318 [11:22:22<9:56:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6572/12318 [11:22:22<9:56:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6573/12318 [11:22:28<9:56:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6573/12318 [11:22:28<9:56:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6574/12318 [11:22:33<9:56:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6574/12318 [11:22:33<9:56:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6575/12318 [11:22:39<9:56:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6575/12318 [11:22:39<9:56:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6576/12318 [11:22:46<9:56:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6576/12318 [11:22:46<9:56:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6577/12318 [11:22:52<9:56:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6577/12318 [11:22:52<9:56:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6578/12318 [11:22:59<9:55:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6578/12318 [11:22:59<9:55:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6579/12318 [11:23:01<9:55:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6579/12318 [11:23:01<9:55:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6580/12318 [11:23:04<9:55:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6580/12318 [11:23:04<9:55:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6581/12318 [11:23:11<9:55:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6581/12318 [11:23:11<9:55:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6582/12318 [11:23:16<9:55:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6582/12318 [11:23:16<9:55:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6583/12318 [11:23:23<9:55:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6583/12318 [11:23:23<9:55:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6584/12318 [11:23:31<9:55:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6584/12318 [11:23:31<9:55:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6585/12318 [11:23:36<9:55:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6585/12318 [11:23:36<9:55:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6586/12318 [11:23:44<9:55:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6586/12318 [11:23:44<9:55:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6587/12318 [11:23:53<9:55:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6587/12318 [11:23:53<9:55:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6588/12318 [11:23:57<9:54:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6588/12318 [11:23:57<9:54:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6589/12318 [11:24:00<9:54:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6589/12318 [11:24:00<9:54:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  53%|▌| 6590/12318 [11:24:08<9:54:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  53%|▌| 6590/12318 [11:24:08<9:54:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6591/12318 [11:24:14<9:54:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6591/12318 [11:24:14<9:54:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6592/12318 [11:24:48<9:54:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6592/12318 [11:24:48<9:54:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6593/12318 [11:24:52<9:54:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6593/12318 [11:24:52<9:54:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6594/12318 [11:25:01<9:54:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6594/12318 [11:25:01<9:54:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6595/12318 [11:25:05<9:54:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6595/12318 [11:25:05<9:54:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6596/12318 [11:25:07<9:54:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6596/12318 [11:25:07<9:54:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6597/12318 [11:25:16<9:54:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6597/12318 [11:25:16<9:54:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6598/12318 [11:25:19<9:54:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6598/12318 [11:25:19<9:54:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6599/12318 [11:25:22<9:53:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6599/12318 [11:25:22<9:53:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6600/12318 [11:25:28<9:53:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6600/12318 [11:25:28<9:53:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6601/12318 [11:25:34<9:53:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6601/12318 [11:25:34<9:53:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6602/12318 [11:25:39<9:53:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6602/12318 [11:25:39<9:53:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6603/12318 [11:25:47<9:53:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6603/12318 [11:25:47<9:53:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6604/12318 [11:25:49<9:53:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6604/12318 [11:25:49<9:53:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6605/12318 [11:25:55<9:53:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6605/12318 [11:25:55<9:53:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6606/12318 [11:26:02<9:53:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6606/12318 [11:26:02<9:53:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6607/12318 [11:26:09<9:53:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6607/12318 [11:26:09<9:53:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6608/12318 [11:26:13<9:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6608/12318 [11:26:13<9:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6609/12318 [11:26:15<9:52:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6609/12318 [11:26:15<9:52:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6610/12318 [11:26:18<9:52:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6610/12318 [11:26:18<9:52:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6611/12318 [11:26:27<9:52:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6611/12318 [11:26:27<9:52:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6612/12318 [11:26:29<9:52:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6612/12318 [11:26:29<9:52:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6613/12318 [11:26:34<9:52:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6613/12318 [11:26:34<9:52:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6614/12318 [11:26:39<9:52:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6614/12318 [11:26:39<9:52:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6615/12318 [11:26:47<9:52:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6615/12318 [11:26:47<9:52:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6616/12318 [11:26:49<9:51:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6616/12318 [11:26:49<9:51:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6617/12318 [11:26:51<9:51:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6617/12318 [11:26:51<9:51:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6618/12318 [11:26:59<9:51:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6618/12318 [11:26:59<9:51:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6619/12318 [11:27:08<9:51:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6619/12318 [11:27:08<9:51:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6620/12318 [11:27:11<9:51:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6620/12318 [11:27:11<9:51:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6621/12318 [11:27:14<9:51:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6621/12318 [11:27:14<9:51:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6622/12318 [11:27:18<9:51:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6622/12318 [11:27:18<9:51:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6623/12318 [11:27:23<9:51:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6623/12318 [11:27:23<9:51:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6624/12318 [11:27:55<9:51:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6624/12318 [11:27:55<9:51:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6625/12318 [11:28:02<9:51:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6625/12318 [11:28:02<9:51:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6626/12318 [11:28:07<9:51:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6626/12318 [11:28:07<9:51:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6627/12318 [11:28:09<9:50:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6627/12318 [11:28:09<9:50:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6628/12318 [11:28:18<9:50:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6628/12318 [11:28:18<9:50:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6629/12318 [11:28:23<9:50:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6629/12318 [11:28:23<9:50:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6630/12318 [11:28:29<9:50:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6630/12318 [11:28:29<9:50:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6631/12318 [11:28:36<9:50:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6631/12318 [11:28:36<9:50:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6632/12318 [11:28:43<9:50:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6632/12318 [11:28:43<9:50:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6633/12318 [11:28:46<9:50:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6633/12318 [11:28:46<9:50:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6634/12318 [11:28:51<9:50:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6634/12318 [11:28:51<9:50:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6635/12318 [11:28:54<9:50:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6635/12318 [11:28:54<9:50:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6636/12318 [11:29:02<9:49:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6636/12318 [11:29:02<9:49:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6637/12318 [11:29:05<9:49:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6637/12318 [11:29:05<9:49:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6638/12318 [11:29:06<9:49:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6638/12318 [11:29:06<9:49:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6639/12318 [11:29:12<9:49:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6639/12318 [11:29:12<9:49:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6640/12318 [11:29:15<9:49:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6640/12318 [11:29:15<9:49:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6641/12318 [11:29:23<9:49:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6641/12318 [11:29:23<9:49:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6642/12318 [11:29:31<9:49:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6642/12318 [11:29:31<9:49:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6643/12318 [11:29:40<9:49:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6643/12318 [11:29:40<9:49:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6644/12318 [11:29:47<9:49:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6644/12318 [11:29:47<9:49:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6645/12318 [11:29:51<9:48:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6645/12318 [11:29:51<9:48:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6646/12318 [11:30:00<9:48:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6646/12318 [11:30:00<9:48:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6647/12318 [11:30:02<9:48:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6647/12318 [11:30:02<9:48:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6648/12318 [11:30:05<9:48:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6648/12318 [11:30:05<9:48:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6649/12318 [11:30:09<9:48:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6649/12318 [11:30:09<9:48:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6650/12318 [11:30:17<9:48:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6650/12318 [11:30:17<9:48:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6651/12318 [11:30:22<9:48:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6651/12318 [11:30:22<9:48:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6652/12318 [11:30:23<9:48:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6652/12318 [11:30:23<9:48:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6653/12318 [11:30:30<9:47:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6653/12318 [11:30:30<9:47:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6654/12318 [11:30:32<9:47:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6654/12318 [11:30:32<9:47:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6655/12318 [11:30:40<9:47:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6655/12318 [11:30:40<9:47:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6656/12318 [11:31:19<9:48:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6656/12318 [11:31:19<9:48:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6657/12318 [11:31:23<9:47:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6657/12318 [11:31:23<9:47:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6658/12318 [11:31:29<9:47:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6658/12318 [11:31:29<9:47:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6659/12318 [11:31:32<9:47:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6659/12318 [11:31:32<9:47:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6660/12318 [11:31:37<9:47:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6660/12318 [11:31:37<9:47:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6661/12318 [11:31:41<9:47:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6661/12318 [11:31:41<9:47:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6662/12318 [11:31:43<9:47:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6662/12318 [11:31:43<9:47:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6663/12318 [11:31:48<9:47:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6663/12318 [11:31:48<9:47:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6664/12318 [11:31:52<9:47:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6664/12318 [11:31:52<9:47:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6665/12318 [11:31:56<9:46:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6665/12318 [11:31:56<9:46:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6666/12318 [11:32:03<9:46:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6666/12318 [11:32:03<9:46:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6667/12318 [11:32:11<9:46:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6667/12318 [11:32:11<9:46:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6668/12318 [11:32:16<9:46:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6668/12318 [11:32:16<9:46:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6669/12318 [11:32:18<9:46:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6669/12318 [11:32:18<9:46:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6670/12318 [11:32:20<9:46:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6670/12318 [11:32:20<9:46:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6671/12318 [11:32:24<9:46:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6671/12318 [11:32:24<9:46:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6672/12318 [11:32:25<9:45:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6672/12318 [11:32:25<9:45:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6673/12318 [11:32:32<9:45:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6673/12318 [11:32:32<9:45:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6674/12318 [11:32:40<9:45:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6674/12318 [11:32:40<9:45:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6675/12318 [11:32:42<9:45:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6675/12318 [11:32:42<9:45:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6676/12318 [11:32:45<9:45:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6676/12318 [11:32:45<9:45:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6677/12318 [11:32:50<9:45:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6677/12318 [11:32:50<9:45:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6678/12318 [11:32:54<9:45:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6678/12318 [11:32:54<9:45:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6679/12318 [11:32:57<9:45:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6679/12318 [11:32:57<9:45:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6680/12318 [11:33:05<9:44:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6680/12318 [11:33:05<9:44:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6681/12318 [11:33:09<9:44:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6681/12318 [11:33:09<9:44:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6682/12318 [11:33:14<9:44:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6682/12318 [11:33:14<9:44:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6683/12318 [11:33:19<9:44:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6683/12318 [11:33:19<9:44:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6684/12318 [11:33:27<9:44:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6684/12318 [11:33:27<9:44:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6685/12318 [11:33:32<9:44:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6685/12318 [11:33:32<9:44:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6686/12318 [11:33:35<9:44:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6686/12318 [11:33:35<9:44:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6687/12318 [11:33:41<9:44:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6687/12318 [11:33:41<9:44:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6688/12318 [11:34:38<9:44:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6688/12318 [11:34:38<9:44:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6689/12318 [11:34:43<9:44:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6689/12318 [11:34:43<9:44:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6690/12318 [11:34:50<9:44:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6690/12318 [11:34:50<9:44:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6691/12318 [11:34:55<9:44:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6691/12318 [11:34:55<9:44:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6692/12318 [11:35:00<9:44:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6692/12318 [11:35:00<9:44:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6693/12318 [11:35:08<9:44:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6693/12318 [11:35:08<9:44:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6694/12318 [11:35:09<9:44:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6694/12318 [11:35:09<9:44:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6695/12318 [11:35:17<9:43:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6695/12318 [11:35:17<9:43:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6696/12318 [11:35:25<9:43:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6696/12318 [11:35:25<9:43:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6697/12318 [11:35:28<9:43:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6697/12318 [11:35:28<9:43:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6698/12318 [11:35:35<9:43:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6698/12318 [11:35:35<9:43:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6699/12318 [11:35:41<9:43:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6699/12318 [11:35:41<9:43:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6700/12318 [11:35:49<9:43:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6700/12318 [11:35:49<9:43:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6701/12318 [11:35:54<9:43:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6701/12318 [11:35:54<9:43:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6702/12318 [11:36:03<9:43:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6702/12318 [11:36:03<9:43:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6703/12318 [11:36:07<9:43:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6703/12318 [11:36:07<9:43:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6704/12318 [11:36:13<9:43:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6704/12318 [11:36:13<9:43:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6705/12318 [11:36:14<9:42:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6705/12318 [11:36:14<9:42:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6706/12318 [11:36:19<9:42:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6706/12318 [11:36:19<9:42:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6707/12318 [11:36:20<9:42:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6707/12318 [11:36:20<9:42:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6708/12318 [11:36:29<9:42:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6708/12318 [11:36:29<9:42:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6709/12318 [11:36:34<9:42:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6709/12318 [11:36:34<9:42:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6710/12318 [11:36:39<9:42:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6710/12318 [11:36:39<9:42:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6711/12318 [11:36:45<9:42:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6711/12318 [11:36:45<9:42:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6712/12318 [11:36:49<9:42:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6712/12318 [11:36:49<9:42:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  54%|▌| 6713/12318 [11:36:58<9:41:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  54%|▌| 6713/12318 [11:36:58<9:41:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6714/12318 [11:37:05<9:41:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6714/12318 [11:37:05<9:41:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6715/12318 [11:37:14<9:41:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6715/12318 [11:37:14<9:41:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6716/12318 [11:37:20<9:41:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6716/12318 [11:37:20<9:41:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6717/12318 [11:37:28<9:41:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6717/12318 [11:37:28<9:41:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6718/12318 [11:37:34<9:41:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6718/12318 [11:37:34<9:41:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6719/12318 [11:37:38<9:41:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6719/12318 [11:37:38<9:41:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6720/12318 [11:37:55<9:41:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6720/12318 [11:37:55<9:41:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6721/12318 [11:38:00<9:41:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6721/12318 [11:38:00<9:41:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6722/12318 [11:38:04<9:41:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6722/12318 [11:38:04<9:41:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6723/12318 [11:38:11<9:41:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6723/12318 [11:38:11<9:41:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6724/12318 [11:38:13<9:40:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6724/12318 [11:38:13<9:40:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6725/12318 [11:38:14<9:40:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6725/12318 [11:38:14<9:40:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6726/12318 [11:38:21<9:40:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6726/12318 [11:38:21<9:40:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6727/12318 [11:38:30<9:40:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6727/12318 [11:38:30<9:40:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6728/12318 [11:38:37<9:40:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6728/12318 [11:38:37<9:40:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6729/12318 [11:38:45<9:40:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6729/12318 [11:38:45<9:40:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6730/12318 [11:38:52<9:40:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6730/12318 [11:38:52<9:40:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6731/12318 [11:38:58<9:40:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6731/12318 [11:38:58<9:40:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6732/12318 [11:39:07<9:40:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6732/12318 [11:39:07<9:40:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6733/12318 [11:39:12<9:39:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6733/12318 [11:39:12<9:39:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6734/12318 [11:39:15<9:39:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6734/12318 [11:39:15<9:39:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6735/12318 [11:39:22<9:39:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6735/12318 [11:39:22<9:39:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6736/12318 [11:39:27<9:39:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6736/12318 [11:39:27<9:39:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6737/12318 [11:39:36<9:39:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6737/12318 [11:39:36<9:39:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6738/12318 [11:39:41<9:39:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6738/12318 [11:39:41<9:39:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6739/12318 [11:39:47<9:39:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6739/12318 [11:39:47<9:39:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6740/12318 [11:39:52<9:39:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6740/12318 [11:39:52<9:39:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6741/12318 [11:39:58<9:39:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6741/12318 [11:39:58<9:39:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6742/12318 [11:40:00<9:38:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6742/12318 [11:40:00<9:38:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6743/12318 [11:40:08<9:38:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6743/12318 [11:40:08<9:38:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6744/12318 [11:40:12<9:38:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6744/12318 [11:40:12<9:38:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6745/12318 [11:40:19<9:38:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6745/12318 [11:40:19<9:38:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6746/12318 [11:40:23<9:38:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6746/12318 [11:40:23<9:38:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6747/12318 [11:40:29<9:38:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6747/12318 [11:40:29<9:38:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6748/12318 [11:40:38<9:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6748/12318 [11:40:38<9:38:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6749/12318 [11:40:39<9:38:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6749/12318 [11:40:39<9:38:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6750/12318 [11:40:47<9:38:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6750/12318 [11:40:47<9:38:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6751/12318 [11:40:52<9:37:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6751/12318 [11:40:52<9:37:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6752/12318 [11:41:08<9:37:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6752/12318 [11:41:08<9:37:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6753/12318 [11:41:15<9:37:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6753/12318 [11:41:15<9:37:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6754/12318 [11:41:18<9:37:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6754/12318 [11:41:18<9:37:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6755/12318 [11:41:20<9:37:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6755/12318 [11:41:20<9:37:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6756/12318 [11:41:26<9:37:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6756/12318 [11:41:26<9:37:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6757/12318 [11:41:28<9:37:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6757/12318 [11:41:28<9:37:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6758/12318 [11:41:29<9:37:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6758/12318 [11:41:29<9:37:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6759/12318 [11:41:36<9:37:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6759/12318 [11:41:36<9:37:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6760/12318 [11:41:40<9:36:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6760/12318 [11:41:40<9:36:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6761/12318 [11:41:43<9:36:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6761/12318 [11:41:43<9:36:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6762/12318 [11:41:46<9:36:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6762/12318 [11:41:46<9:36:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6763/12318 [11:41:52<9:36:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6763/12318 [11:41:52<9:36:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6764/12318 [11:41:57<9:36:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6764/12318 [11:41:57<9:36:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6765/12318 [11:42:02<9:36:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6765/12318 [11:42:02<9:36:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6766/12318 [11:42:05<9:36:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6766/12318 [11:42:05<9:36:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6767/12318 [11:42:12<9:36:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6767/12318 [11:42:12<9:36:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6768/12318 [11:42:21<9:35:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6768/12318 [11:42:21<9:35:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6769/12318 [11:42:24<9:35:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6769/12318 [11:42:24<9:35:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6770/12318 [11:42:32<9:35:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6770/12318 [11:42:32<9:35:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6771/12318 [11:42:39<9:35:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6771/12318 [11:42:39<9:35:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6772/12318 [11:42:41<9:35:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6772/12318 [11:42:41<9:35:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6773/12318 [11:42:43<9:35:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6773/12318 [11:42:43<9:35:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6774/12318 [11:42:47<9:35:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6774/12318 [11:42:47<9:35:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6775/12318 [11:42:55<9:35:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6775/12318 [11:42:55<9:35:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6776/12318 [11:43:03<9:35:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6776/12318 [11:43:03<9:35:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6777/12318 [11:43:06<9:34:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6777/12318 [11:43:06<9:34:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6778/12318 [11:43:14<9:34:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6778/12318 [11:43:14<9:34:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6779/12318 [11:43:22<9:34:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6779/12318 [11:43:22<9:34:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6780/12318 [11:43:30<9:34:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6780/12318 [11:43:30<9:34:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6781/12318 [11:43:32<9:34:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6781/12318 [11:43:32<9:34:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6782/12318 [11:43:39<9:34:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6782/12318 [11:43:39<9:34:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6783/12318 [11:43:43<9:34:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6783/12318 [11:43:43<9:34:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6784/12318 [11:44:13<9:34:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6784/12318 [11:44:13<9:34:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6785/12318 [11:44:20<9:34:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6785/12318 [11:44:20<9:34:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6786/12318 [11:44:25<9:34:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6786/12318 [11:44:25<9:34:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6787/12318 [11:44:30<9:34:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6787/12318 [11:44:30<9:34:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6788/12318 [11:44:34<9:33:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6788/12318 [11:44:34<9:33:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6789/12318 [11:44:35<9:33:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6789/12318 [11:44:35<9:33:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6790/12318 [11:44:40<9:33:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6790/12318 [11:44:40<9:33:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6791/12318 [11:44:45<9:33:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6791/12318 [11:44:45<9:33:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6792/12318 [11:44:51<9:33:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6792/12318 [11:44:51<9:33:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6793/12318 [11:44:56<9:33:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6793/12318 [11:44:56<9:33:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6794/12318 [11:45:01<9:33:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6794/12318 [11:45:01<9:33:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6795/12318 [11:45:09<9:33:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6795/12318 [11:45:09<9:33:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6796/12318 [11:45:11<9:32:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6796/12318 [11:45:11<9:32:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6797/12318 [11:45:19<9:32:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6797/12318 [11:45:19<9:32:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6798/12318 [11:45:24<9:32:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6798/12318 [11:45:24<9:32:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6799/12318 [11:45:29<9:32:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6799/12318 [11:45:29<9:32:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6800/12318 [11:45:35<9:32:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6800/12318 [11:45:35<9:32:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6801/12318 [11:45:42<9:32:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6801/12318 [11:45:42<9:32:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6802/12318 [11:45:49<9:32:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6802/12318 [11:45:49<9:32:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6803/12318 [11:45:52<9:32:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6803/12318 [11:45:52<9:32:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6804/12318 [11:46:00<9:32:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6804/12318 [11:46:00<9:32:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6805/12318 [11:46:08<9:32:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6805/12318 [11:46:08<9:32:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6806/12318 [11:46:11<9:31:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6806/12318 [11:46:11<9:31:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6807/12318 [11:46:20<9:31:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6807/12318 [11:46:20<9:31:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6808/12318 [11:46:28<9:31:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6808/12318 [11:46:28<9:31:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6809/12318 [11:46:32<9:31:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6809/12318 [11:46:32<9:31:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6810/12318 [11:46:39<9:31:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6810/12318 [11:46:39<9:31:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6811/12318 [11:46:43<9:31:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6811/12318 [11:46:43<9:31:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6812/12318 [11:46:49<9:31:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6812/12318 [11:46:49<9:31:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6813/12318 [11:46:54<9:31:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6813/12318 [11:46:54<9:31:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6814/12318 [11:47:03<9:31:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6814/12318 [11:47:03<9:31:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6815/12318 [11:47:12<9:31:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6815/12318 [11:47:12<9:31:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6816/12318 [11:47:27<9:31:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6816/12318 [11:47:27<9:31:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6817/12318 [11:47:34<9:30:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6817/12318 [11:47:34<9:30:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6818/12318 [11:47:36<9:30:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6818/12318 [11:47:36<9:30:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6819/12318 [11:47:43<9:30:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6819/12318 [11:47:43<9:30:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6820/12318 [11:47:52<9:30:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6820/12318 [11:47:52<9:30:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6821/12318 [11:48:00<9:30:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6821/12318 [11:48:00<9:30:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6822/12318 [11:48:07<9:30:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6822/12318 [11:48:07<9:30:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6823/12318 [11:48:08<9:30:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6823/12318 [11:48:08<9:30:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6824/12318 [11:48:14<9:30:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6824/12318 [11:48:14<9:30:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6825/12318 [11:48:19<9:30:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6825/12318 [11:48:19<9:30:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6826/12318 [11:48:23<9:29:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6826/12318 [11:48:23<9:29:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6827/12318 [11:48:31<9:29:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6827/12318 [11:48:31<9:29:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6828/12318 [11:48:35<9:29:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6828/12318 [11:48:35<9:29:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6829/12318 [11:48:39<9:29:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6829/12318 [11:48:39<9:29:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6830/12318 [11:48:46<9:29:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6830/12318 [11:48:46<9:29:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6831/12318 [11:48:52<9:29:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6831/12318 [11:48:52<9:29:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6832/12318 [11:48:57<9:29:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6832/12318 [11:48:57<9:29:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6833/12318 [11:49:05<9:29:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6833/12318 [11:49:05<9:29:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6834/12318 [11:49:11<9:29:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6834/12318 [11:49:11<9:29:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6835/12318 [11:49:20<9:29:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  55%|▌| 6835/12318 [11:49:20<9:29:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6836/12318 [11:49:24<9:28:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  55%|▌| 6836/12318 [11:49:24<9:28:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6837/12318 [11:49:28<9:28:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6837/12318 [11:49:28<9:28:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6838/12318 [11:49:36<9:28:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6838/12318 [11:49:36<9:28:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6839/12318 [11:49:44<9:28:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6839/12318 [11:49:44<9:28:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6840/12318 [11:49:50<9:28:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6840/12318 [11:49:50<9:28:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6841/12318 [11:49:54<9:28:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6841/12318 [11:49:54<9:28:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6842/12318 [11:50:03<9:28:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6842/12318 [11:50:03<9:28:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6843/12318 [11:50:12<9:28:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6843/12318 [11:50:12<9:28:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6844/12318 [11:50:15<9:28:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6844/12318 [11:50:15<9:28:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6845/12318 [11:50:22<9:27:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6845/12318 [11:50:22<9:27:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6846/12318 [11:50:27<9:27:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6846/12318 [11:50:27<9:27:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6847/12318 [11:50:31<9:27:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6847/12318 [11:50:31<9:27:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6848/12318 [11:50:47<9:27:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6848/12318 [11:50:47<9:27:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6849/12318 [11:50:56<9:27:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6849/12318 [11:50:56<9:27:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6850/12318 [11:51:02<9:27:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6850/12318 [11:51:02<9:27:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6851/12318 [11:51:07<9:27:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6851/12318 [11:51:07<9:27:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6852/12318 [11:51:16<9:27:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6852/12318 [11:51:16<9:27:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6853/12318 [11:51:22<9:27:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6853/12318 [11:51:22<9:27:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6854/12318 [11:51:27<9:27:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6854/12318 [11:51:27<9:27:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6855/12318 [11:51:32<9:27:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6855/12318 [11:51:32<9:27:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6856/12318 [11:51:33<9:26:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6856/12318 [11:51:33<9:26:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6857/12318 [11:51:38<9:26:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6857/12318 [11:51:38<9:26:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6858/12318 [11:51:46<9:26:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6858/12318 [11:51:46<9:26:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6859/12318 [11:51:51<9:26:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6859/12318 [11:51:51<9:26:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6860/12318 [11:51:52<9:26:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6860/12318 [11:51:52<9:26:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6861/12318 [11:51:58<9:26:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6861/12318 [11:51:58<9:26:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6862/12318 [11:52:01<9:26:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6862/12318 [11:52:01<9:26:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6863/12318 [11:52:09<9:26:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6863/12318 [11:52:09<9:26:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6864/12318 [11:52:15<9:25:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6864/12318 [11:52:15<9:25:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6865/12318 [11:52:16<9:25:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6865/12318 [11:52:16<9:25:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6866/12318 [11:52:24<9:25:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6866/12318 [11:52:24<9:25:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6867/12318 [11:52:28<9:25:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6867/12318 [11:52:28<9:25:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6868/12318 [11:52:33<9:25:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6868/12318 [11:52:33<9:25:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6869/12318 [11:52:38<9:25:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6869/12318 [11:52:38<9:25:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6870/12318 [11:52:42<9:25:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6870/12318 [11:52:42<9:25:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6871/12318 [11:52:45<9:25:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6871/12318 [11:52:45<9:25:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6872/12318 [11:52:49<9:24:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6872/12318 [11:52:49<9:24:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6873/12318 [11:52:56<9:24:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6873/12318 [11:52:56<9:24:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6874/12318 [11:53:01<9:24:41,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6874/12318 [11:53:01<9:24:41,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6875/12318 [11:53:07<9:24:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6875/12318 [11:53:07<9:24:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6876/12318 [11:53:16<9:24:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6876/12318 [11:53:16<9:24:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6877/12318 [11:53:21<9:24:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6877/12318 [11:53:21<9:24:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6878/12318 [11:53:23<9:24:14,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6878/12318 [11:53:23<9:24:14,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6879/12318 [11:53:28<9:24:07,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6879/12318 [11:53:28<9:24:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6880/12318 [11:53:56<9:24:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6880/12318 [11:53:56<9:24:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6881/12318 [11:53:57<9:24:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6881/12318 [11:53:57<9:24:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6882/12318 [11:54:04<9:24:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6882/12318 [11:54:04<9:24:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6883/12318 [11:54:12<9:23:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6883/12318 [11:54:12<9:23:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6884/12318 [11:54:20<9:23:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6884/12318 [11:54:20<9:23:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6885/12318 [11:54:27<9:23:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6885/12318 [11:54:27<9:23:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6886/12318 [11:54:32<9:23:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6886/12318 [11:54:32<9:23:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6887/12318 [11:54:34<9:23:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6887/12318 [11:54:34<9:23:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6888/12318 [11:54:42<9:23:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6888/12318 [11:54:42<9:23:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6889/12318 [11:54:51<9:23:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6889/12318 [11:54:51<9:23:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6890/12318 [11:54:53<9:23:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6890/12318 [11:54:53<9:23:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6891/12318 [11:55:01<9:23:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6891/12318 [11:55:01<9:23:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6892/12318 [11:55:08<9:23:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6892/12318 [11:55:08<9:23:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6893/12318 [11:55:16<9:22:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6893/12318 [11:55:16<9:22:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6894/12318 [11:55:21<9:22:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6894/12318 [11:55:21<9:22:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6895/12318 [11:55:29<9:22:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6895/12318 [11:55:29<9:22:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6896/12318 [11:55:34<9:22:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6896/12318 [11:55:34<9:22:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6897/12318 [11:55:40<9:22:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6897/12318 [11:55:40<9:22:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6898/12318 [11:55:48<9:22:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6898/12318 [11:55:48<9:22:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6899/12318 [11:55:54<9:22:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6899/12318 [11:55:54<9:22:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6900/12318 [11:55:59<9:22:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6900/12318 [11:55:59<9:22:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6901/12318 [11:56:03<9:22:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6901/12318 [11:56:03<9:22:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6902/12318 [11:56:10<9:21:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6902/12318 [11:56:10<9:21:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6903/12318 [11:56:18<9:21:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6903/12318 [11:56:18<9:21:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6904/12318 [11:56:21<9:21:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6904/12318 [11:56:21<9:21:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6905/12318 [11:56:27<9:21:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6905/12318 [11:56:27<9:21:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6906/12318 [11:56:33<9:21:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6906/12318 [11:56:33<9:21:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6907/12318 [11:56:39<9:21:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6907/12318 [11:56:39<9:21:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6908/12318 [11:56:43<9:21:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6908/12318 [11:56:43<9:21:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6909/12318 [11:56:46<9:21:09,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6909/12318 [11:56:46<9:21:09,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6910/12318 [11:56:49<9:21:00,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6910/12318 [11:56:49<9:21:00,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6911/12318 [11:56:54<9:20:53,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6911/12318 [11:56:54<9:20:53,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6912/12318 [11:57:14<9:20:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6912/12318 [11:57:14<9:20:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6913/12318 [11:57:22<9:20:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6913/12318 [11:57:22<9:20:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6914/12318 [11:57:23<9:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6914/12318 [11:57:23<9:20:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6915/12318 [11:57:32<9:20:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6915/12318 [11:57:32<9:20:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6916/12318 [11:57:41<9:20:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6916/12318 [11:57:41<9:20:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6917/12318 [11:57:42<9:20:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6917/12318 [11:57:42<9:20:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6918/12318 [11:57:51<9:20:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6918/12318 [11:57:51<9:20:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6919/12318 [11:57:56<9:20:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6919/12318 [11:57:56<9:20:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6920/12318 [11:58:02<9:20:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6920/12318 [11:58:02<9:20:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6921/12318 [11:58:09<9:20:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6921/12318 [11:58:09<9:20:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6922/12318 [11:58:11<9:19:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6922/12318 [11:58:11<9:19:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6923/12318 [11:58:13<9:19:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6923/12318 [11:58:13<9:19:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6924/12318 [11:58:20<9:19:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6924/12318 [11:58:20<9:19:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6925/12318 [11:58:25<9:19:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6925/12318 [11:58:25<9:19:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6926/12318 [11:58:33<9:19:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6926/12318 [11:58:33<9:19:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6927/12318 [11:58:35<9:19:15,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6927/12318 [11:58:35<9:19:15,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6928/12318 [11:58:43<9:19:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6928/12318 [11:58:43<9:19:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6929/12318 [11:58:48<9:19:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6929/12318 [11:58:48<9:19:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6930/12318 [11:58:55<9:18:57,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6930/12318 [11:58:55<9:18:57,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6931/12318 [11:59:03<9:18:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6931/12318 [11:59:03<9:18:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6932/12318 [11:59:07<9:18:44,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6932/12318 [11:59:07<9:18:44,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6933/12318 [11:59:14<9:18:38,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6933/12318 [11:59:14<9:18:38,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6934/12318 [11:59:20<9:18:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6934/12318 [11:59:20<9:18:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6935/12318 [11:59:25<9:18:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6935/12318 [11:59:25<9:18:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6936/12318 [11:59:28<9:18:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6936/12318 [11:59:28<9:18:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6937/12318 [11:59:37<9:18:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6937/12318 [11:59:37<9:18:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6938/12318 [11:59:44<9:18:06,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6938/12318 [11:59:44<9:18:06,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6939/12318 [11:59:49<9:17:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6939/12318 [11:59:49<9:17:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6940/12318 [11:59:50<9:17:49,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6940/12318 [11:59:50<9:17:49,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6941/12318 [11:59:57<9:17:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6941/12318 [11:59:57<9:17:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6942/12318 [12:00:03<9:17:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6942/12318 [12:00:03<9:17:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6943/12318 [12:00:08<9:17:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6943/12318 [12:00:08<9:17:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6944/12318 [12:00:34<9:17:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6944/12318 [12:00:34<9:17:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6945/12318 [12:00:40<9:17:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6945/12318 [12:00:40<9:17:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6946/12318 [12:00:42<9:17:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6946/12318 [12:00:42<9:17:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6947/12318 [12:00:47<9:17:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6947/12318 [12:00:47<9:17:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6948/12318 [12:00:51<9:17:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6948/12318 [12:00:51<9:17:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6949/12318 [12:00:59<9:17:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6949/12318 [12:00:59<9:17:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6950/12318 [12:01:03<9:16:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6950/12318 [12:01:03<9:16:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6951/12318 [12:01:04<9:16:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6951/12318 [12:01:04<9:16:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6952/12318 [12:01:05<9:16:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6952/12318 [12:01:05<9:16:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6953/12318 [12:01:14<9:16:31,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6953/12318 [12:01:14<9:16:31,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6954/12318 [12:01:22<9:16:25,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6954/12318 [12:01:22<9:16:25,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6955/12318 [12:01:26<9:16:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6955/12318 [12:01:26<9:16:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6956/12318 [12:01:30<9:16:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6956/12318 [12:01:30<9:16:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6957/12318 [12:01:35<9:16:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6957/12318 [12:01:35<9:16:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6958/12318 [12:01:41<9:15:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6958/12318 [12:01:41<9:15:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  56%|▌| 6959/12318 [12:01:48<9:15:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  56%|▌| 6959/12318 [12:01:48<9:15:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6960/12318 [12:01:56<9:15:46,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6960/12318 [12:01:56<9:15:46,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6961/12318 [12:01:57<9:15:36,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6961/12318 [12:01:57<9:15:36,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6962/12318 [12:02:03<9:15:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6962/12318 [12:02:03<9:15:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6963/12318 [12:02:08<9:15:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6963/12318 [12:02:08<9:15:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6964/12318 [12:02:16<9:15:17,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6964/12318 [12:02:16<9:15:17,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6965/12318 [12:02:19<9:15:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6965/12318 [12:02:19<9:15:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6966/12318 [12:02:27<9:15:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6966/12318 [12:02:27<9:15:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6967/12318 [12:02:35<9:14:59,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6967/12318 [12:02:35<9:14:59,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6968/12318 [12:02:39<9:14:51,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6968/12318 [12:02:39<9:14:51,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6969/12318 [12:02:46<9:14:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6969/12318 [12:02:46<9:14:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6970/12318 [12:02:49<9:14:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6970/12318 [12:02:49<9:14:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6971/12318 [12:02:54<9:14:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6971/12318 [12:02:54<9:14:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6972/12318 [12:02:58<9:14:22,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6972/12318 [12:02:58<9:14:22,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6973/12318 [12:03:00<9:14:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6973/12318 [12:03:00<9:14:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6974/12318 [12:03:08<9:14:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6974/12318 [12:03:08<9:14:07,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6975/12318 [12:03:14<9:14:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6975/12318 [12:03:14<9:14:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6976/12318 [12:04:01<9:14:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6976/12318 [12:04:01<9:14:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6977/12318 [12:04:06<9:14:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6977/12318 [12:04:06<9:14:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6978/12318 [12:04:13<9:14:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6978/12318 [12:04:13<9:14:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6979/12318 [12:04:18<9:14:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6979/12318 [12:04:18<9:14:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6980/12318 [12:04:20<9:13:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6980/12318 [12:04:20<9:13:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6981/12318 [12:04:21<9:13:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6981/12318 [12:04:21<9:13:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6982/12318 [12:04:30<9:13:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6982/12318 [12:04:30<9:13:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6983/12318 [12:04:36<9:13:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6983/12318 [12:04:36<9:13:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6984/12318 [12:04:43<9:13:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6984/12318 [12:04:43<9:13:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6985/12318 [12:04:46<9:13:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6985/12318 [12:04:46<9:13:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6986/12318 [12:04:54<9:13:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6986/12318 [12:04:54<9:13:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6987/12318 [12:05:01<9:13:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6987/12318 [12:05:01<9:13:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6988/12318 [12:05:08<9:13:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6988/12318 [12:05:08<9:13:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6989/12318 [12:05:16<9:13:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6989/12318 [12:05:16<9:13:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6990/12318 [12:05:20<9:12:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6990/12318 [12:05:20<9:12:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6991/12318 [12:05:26<9:12:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6991/12318 [12:05:26<9:12:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6992/12318 [12:05:30<9:12:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6992/12318 [12:05:30<9:12:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6993/12318 [12:05:38<9:12:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6993/12318 [12:05:38<9:12:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6994/12318 [12:05:43<9:12:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6994/12318 [12:05:43<9:12:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6995/12318 [12:05:51<9:12:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6995/12318 [12:05:51<9:12:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6996/12318 [12:06:00<9:12:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6996/12318 [12:06:00<9:12:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6997/12318 [12:06:08<9:12:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6997/12318 [12:06:08<9:12:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6998/12318 [12:06:13<9:12:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6998/12318 [12:06:13<9:12:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 6999/12318 [12:06:17<9:11:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 6999/12318 [12:06:17<9:11:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7000/12318 [12:06:22<9:11:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7000/12318 [12:06:22<9:11:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7001/12318 [12:06:24<9:11:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7001/12318 [12:06:24<9:11:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7002/12318 [12:06:30<9:11:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7002/12318 [12:06:30<9:11:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7003/12318 [12:06:34<9:11:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7003/12318 [12:06:34<9:11:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7004/12318 [12:06:39<9:11:19,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7004/12318 [12:06:39<9:11:19,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7005/12318 [12:06:44<9:11:12,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7005/12318 [12:06:44<9:11:12,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7006/12318 [12:06:47<9:11:03,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7006/12318 [12:06:47<9:11:03,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7007/12318 [12:06:55<9:10:58,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7007/12318 [12:06:55<9:10:58,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7008/12318 [12:07:12<9:11:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7008/12318 [12:07:12<9:11:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7009/12318 [12:07:15<9:10:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7009/12318 [12:07:15<9:10:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7010/12318 [12:07:21<9:10:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7010/12318 [12:07:21<9:10:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7011/12318 [12:07:22<9:10:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7011/12318 [12:07:22<9:10:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7012/12318 [12:07:26<9:10:27,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7012/12318 [12:07:26<9:10:27,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7013/12318 [12:07:35<9:10:23,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7013/12318 [12:07:35<9:10:23,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7014/12318 [12:07:42<9:10:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7014/12318 [12:07:42<9:10:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7015/12318 [12:07:51<9:10:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7015/12318 [12:07:51<9:10:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7016/12318 [12:07:55<9:10:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7016/12318 [12:07:55<9:10:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7017/12318 [12:08:01<9:09:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7017/12318 [12:08:01<9:09:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7018/12318 [12:08:06<9:09:52,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7018/12318 [12:08:06<9:09:52,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7019/12318 [12:08:13<9:09:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7019/12318 [12:08:13<9:09:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7020/12318 [12:08:18<9:09:39,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7020/12318 [12:08:18<9:09:39,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7021/12318 [12:08:24<9:09:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7021/12318 [12:08:24<9:09:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7022/12318 [12:08:29<9:09:26,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7022/12318 [12:08:29<9:09:26,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7023/12318 [12:08:36<9:09:20,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7023/12318 [12:08:36<9:09:20,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7024/12318 [12:08:37<9:09:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7024/12318 [12:08:37<9:09:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7025/12318 [12:08:42<9:09:02,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7025/12318 [12:08:42<9:09:02,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7026/12318 [12:08:46<9:08:54,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7026/12318 [12:08:46<9:08:54,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7027/12318 [12:08:52<9:08:48,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7027/12318 [12:08:52<9:08:48,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7028/12318 [12:09:00<9:08:43,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7028/12318 [12:09:00<9:08:43,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7029/12318 [12:09:06<9:08:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7029/12318 [12:09:06<9:08:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7030/12318 [12:09:14<9:08:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7030/12318 [12:09:14<9:08:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7031/12318 [12:09:18<9:08:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7031/12318 [12:09:18<9:08:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7032/12318 [12:09:25<9:08:18,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7032/12318 [12:09:25<9:08:18,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7033/12318 [12:09:28<9:08:10,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7033/12318 [12:09:28<9:08:10,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7034/12318 [12:09:31<9:08:01,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7034/12318 [12:09:31<9:08:01,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7035/12318 [12:09:39<9:07:56,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7035/12318 [12:09:39<9:07:56,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7036/12318 [12:09:45<9:07:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7036/12318 [12:09:45<9:07:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7037/12318 [12:09:54<9:07:45,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7037/12318 [12:09:54<9:07:45,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7038/12318 [12:09:57<9:07:37,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7038/12318 [12:09:57<9:07:37,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7039/12318 [12:10:01<9:07:29,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7039/12318 [12:10:01<9:07:29,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7040/12318 [12:10:52<9:07:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7040/12318 [12:10:52<9:07:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7041/12318 [12:10:54<9:07:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7041/12318 [12:10:54<9:07:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7042/12318 [12:10:56<9:07:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7042/12318 [12:10:56<9:07:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7043/12318 [12:11:03<9:07:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7043/12318 [12:11:03<9:07:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7044/12318 [12:11:07<9:07:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7044/12318 [12:11:07<9:07:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7045/12318 [12:11:13<9:07:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7045/12318 [12:11:13<9:07:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7046/12318 [12:11:14<9:07:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7046/12318 [12:11:14<9:07:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7047/12318 [12:11:18<9:07:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7047/12318 [12:11:18<9:07:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7048/12318 [12:11:22<9:06:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7048/12318 [12:11:22<9:06:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7049/12318 [12:11:26<9:06:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7049/12318 [12:11:26<9:06:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7050/12318 [12:11:34<9:06:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7050/12318 [12:11:34<9:06:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7051/12318 [12:11:35<9:06:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7051/12318 [12:11:35<9:06:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7052/12318 [12:11:42<9:06:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7052/12318 [12:11:42<9:06:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7053/12318 [12:11:45<9:06:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7053/12318 [12:11:45<9:06:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7054/12318 [12:11:50<9:06:08,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7054/12318 [12:11:50<9:06:08,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7055/12318 [12:11:57<9:06:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7055/12318 [12:11:57<9:06:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7056/12318 [12:12:02<9:05:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7056/12318 [12:12:02<9:05:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7057/12318 [12:12:10<9:05:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7057/12318 [12:12:10<9:05:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7058/12318 [12:12:14<9:05:42,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7058/12318 [12:12:14<9:05:42,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7059/12318 [12:12:20<9:05:35,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7059/12318 [12:12:20<9:05:35,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7060/12318 [12:12:28<9:05:30,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7060/12318 [12:12:28<9:05:30,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7061/12318 [12:12:34<9:05:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7061/12318 [12:12:34<9:05:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7062/12318 [12:12:42<9:05:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7062/12318 [12:12:42<9:05:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7063/12318 [12:12:49<9:05:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7063/12318 [12:12:49<9:05:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7064/12318 [12:12:57<9:05:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7064/12318 [12:12:57<9:05:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7065/12318 [12:13:03<9:05:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7065/12318 [12:13:03<9:05:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7066/12318 [12:13:06<9:04:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7066/12318 [12:13:06<9:04:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7067/12318 [12:13:12<9:04:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7067/12318 [12:13:12<9:04:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7068/12318 [12:13:20<9:04:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7068/12318 [12:13:20<9:04:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7069/12318 [12:13:28<9:04:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7069/12318 [12:13:28<9:04:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7070/12318 [12:13:34<9:04:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7070/12318 [12:13:34<9:04:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7071/12318 [12:13:40<9:04:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7071/12318 [12:13:40<9:04:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7072/12318 [12:14:10<9:04:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7072/12318 [12:14:10<9:04:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7073/12318 [12:14:17<9:04:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7073/12318 [12:14:17<9:04:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7074/12318 [12:14:20<9:04:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7074/12318 [12:14:20<9:04:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7075/12318 [12:14:25<9:04:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7075/12318 [12:14:25<9:04:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7076/12318 [12:14:29<9:04:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7076/12318 [12:14:29<9:04:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7077/12318 [12:14:34<9:03:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7077/12318 [12:14:34<9:03:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7078/12318 [12:14:40<9:03:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7078/12318 [12:14:40<9:03:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7079/12318 [12:14:49<9:03:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7079/12318 [12:14:49<9:03:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7080/12318 [12:14:54<9:03:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7080/12318 [12:14:54<9:03:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7081/12318 [12:14:58<9:03:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7081/12318 [12:14:58<9:03:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  57%|▌| 7082/12318 [12:15:03<9:03:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  57%|▌| 7082/12318 [12:15:03<9:03:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7083/12318 [12:15:11<9:03:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7083/12318 [12:15:11<9:03:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7084/12318 [12:15:20<9:03:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7084/12318 [12:15:20<9:03:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7085/12318 [12:15:22<9:03:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7085/12318 [12:15:22<9:03:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7086/12318 [12:15:30<9:03:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7086/12318 [12:15:30<9:03:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7087/12318 [12:15:33<9:02:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7087/12318 [12:15:33<9:02:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7088/12318 [12:15:39<9:02:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7088/12318 [12:15:39<9:02:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7089/12318 [12:15:48<9:02:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7089/12318 [12:15:48<9:02:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7090/12318 [12:15:53<9:02:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7090/12318 [12:15:53<9:02:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7091/12318 [12:15:58<9:02:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7091/12318 [12:15:58<9:02:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7092/12318 [12:16:02<9:02:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7092/12318 [12:16:02<9:02:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7093/12318 [12:16:05<9:02:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7093/12318 [12:16:05<9:02:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7094/12318 [12:16:12<9:02:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7094/12318 [12:16:12<9:02:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7095/12318 [12:16:15<9:01:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7095/12318 [12:16:15<9:01:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7096/12318 [12:16:16<9:01:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7096/12318 [12:16:16<9:01:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7097/12318 [12:16:18<9:01:40,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7097/12318 [12:16:18<9:01:40,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7098/12318 [12:16:22<9:01:32,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7098/12318 [12:16:22<9:01:32,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7099/12318 [12:16:26<9:01:24,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7099/12318 [12:16:26<9:01:24,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7100/12318 [12:16:30<9:01:16,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7100/12318 [12:16:30<9:01:16,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7101/12318 [12:16:37<9:01:11,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7101/12318 [12:16:37<9:01:11,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7102/12318 [12:16:43<9:01:04,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7102/12318 [12:16:43<9:01:04,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7103/12318 [12:16:45<9:00:55,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7103/12318 [12:16:45<9:00:55,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7104/12318 [12:17:36<9:01:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7104/12318 [12:17:36<9:01:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7105/12318 [12:17:38<9:01:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7105/12318 [12:17:38<9:01:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7106/12318 [12:17:46<9:01:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7106/12318 [12:17:46<9:01:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7107/12318 [12:17:49<9:00:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7107/12318 [12:17:49<9:00:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7108/12318 [12:17:57<9:00:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7108/12318 [12:17:57<9:00:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7109/12318 [12:18:05<9:00:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7109/12318 [12:18:05<9:00:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7110/12318 [12:18:11<9:00:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7110/12318 [12:18:11<9:00:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7111/12318 [12:18:17<9:00:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7111/12318 [12:18:17<9:00:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7112/12318 [12:18:26<9:00:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7112/12318 [12:18:26<9:00:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7113/12318 [12:18:34<9:00:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7113/12318 [12:18:34<9:00:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7114/12318 [12:18:39<9:00:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7114/12318 [12:18:39<9:00:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7115/12318 [12:18:45<9:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7115/12318 [12:18:45<9:00:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7116/12318 [12:18:54<9:00:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7116/12318 [12:18:54<9:00:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7117/12318 [12:19:02<9:00:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7117/12318 [12:19:02<9:00:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7118/12318 [12:19:04<8:59:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7118/12318 [12:19:04<8:59:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7119/12318 [12:19:07<8:59:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7119/12318 [12:19:07<8:59:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7120/12318 [12:19:09<8:59:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7120/12318 [12:19:09<8:59:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7121/12318 [12:19:13<8:59:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7121/12318 [12:19:13<8:59:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7122/12318 [12:19:18<8:59:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7122/12318 [12:19:18<8:59:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7123/12318 [12:19:26<8:59:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7123/12318 [12:19:26<8:59:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7124/12318 [12:19:32<8:59:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7124/12318 [12:19:32<8:59:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7125/12318 [12:19:34<8:59:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7125/12318 [12:19:34<8:59:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7126/12318 [12:19:36<8:58:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7126/12318 [12:19:36<8:58:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7127/12318 [12:19:37<8:58:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7127/12318 [12:19:37<8:58:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7128/12318 [12:19:40<8:58:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7128/12318 [12:19:40<8:58:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7129/12318 [12:19:44<8:58:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7129/12318 [12:19:44<8:58:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7130/12318 [12:19:48<8:58:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7130/12318 [12:19:48<8:58:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7131/12318 [12:19:57<8:58:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7131/12318 [12:19:57<8:58:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7132/12318 [12:20:02<8:58:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7132/12318 [12:20:02<8:58:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7133/12318 [12:20:07<8:57:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7133/12318 [12:20:07<8:57:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7134/12318 [12:20:09<8:57:50,  6.22s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7134/12318 [12:20:09<8:57:50,  6.22s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7135/12318 [12:20:17<8:57:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7135/12318 [12:20:17<8:57:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7136/12318 [12:20:47<8:57:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7136/12318 [12:20:47<8:57:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7137/12318 [12:20:56<8:57:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7137/12318 [12:20:56<8:57:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7138/12318 [12:21:04<8:57:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7138/12318 [12:21:04<8:57:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7139/12318 [12:21:08<8:57:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7139/12318 [12:21:08<8:57:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7140/12318 [12:21:16<8:57:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7140/12318 [12:21:16<8:57:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7141/12318 [12:21:24<8:57:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7141/12318 [12:21:24<8:57:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7142/12318 [12:21:29<8:57:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7142/12318 [12:21:29<8:57:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7143/12318 [12:21:36<8:57:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7143/12318 [12:21:36<8:57:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7144/12318 [12:21:37<8:57:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7144/12318 [12:21:37<8:57:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7145/12318 [12:21:44<8:57:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7145/12318 [12:21:44<8:57:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7146/12318 [12:21:52<8:56:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7146/12318 [12:21:52<8:56:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7147/12318 [12:21:54<8:56:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7147/12318 [12:21:54<8:56:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7148/12318 [12:21:57<8:56:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7148/12318 [12:21:57<8:56:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7149/12318 [12:22:03<8:56:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7149/12318 [12:22:03<8:56:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7150/12318 [12:22:08<8:56:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7150/12318 [12:22:08<8:56:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7151/12318 [12:22:11<8:56:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7151/12318 [12:22:11<8:56:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7152/12318 [12:22:20<8:56:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7152/12318 [12:22:20<8:56:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7153/12318 [12:22:23<8:56:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7153/12318 [12:22:23<8:56:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7154/12318 [12:22:30<8:55:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7154/12318 [12:22:30<8:55:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7155/12318 [12:22:35<8:55:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7155/12318 [12:22:35<8:55:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7156/12318 [12:22:40<8:55:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7156/12318 [12:22:40<8:55:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7157/12318 [12:22:47<8:55:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7157/12318 [12:22:47<8:55:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7158/12318 [12:22:48<8:55:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7158/12318 [12:22:48<8:55:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7159/12318 [12:22:54<8:55:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7159/12318 [12:22:54<8:55:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7160/12318 [12:23:02<8:55:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7160/12318 [12:23:02<8:55:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7161/12318 [12:23:09<8:55:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7161/12318 [12:23:09<8:55:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7162/12318 [12:23:16<8:55:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7162/12318 [12:23:16<8:55:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7163/12318 [12:23:19<8:54:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7163/12318 [12:23:19<8:54:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7164/12318 [12:23:22<8:54:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7164/12318 [12:23:22<8:54:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7165/12318 [12:23:29<8:54:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7165/12318 [12:23:29<8:54:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7166/12318 [12:23:33<8:54:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7166/12318 [12:23:33<8:54:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7167/12318 [12:23:36<8:54:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7167/12318 [12:23:36<8:54:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7168/12318 [12:24:17<8:54:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7168/12318 [12:24:17<8:54:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7169/12318 [12:24:24<8:54:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7169/12318 [12:24:24<8:54:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7170/12318 [12:24:27<8:54:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7170/12318 [12:24:27<8:54:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7171/12318 [12:24:36<8:54:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7171/12318 [12:24:36<8:54:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7172/12318 [12:24:43<8:54:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7172/12318 [12:24:43<8:54:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7173/12318 [12:24:50<8:54:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7173/12318 [12:24:50<8:54:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7174/12318 [12:24:55<8:54:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7174/12318 [12:24:55<8:54:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7175/12318 [12:25:02<8:54:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7175/12318 [12:25:02<8:54:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7176/12318 [12:25:05<8:53:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7176/12318 [12:25:05<8:53:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7177/12318 [12:25:06<8:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7177/12318 [12:25:06<8:53:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7178/12318 [12:25:14<8:53:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7178/12318 [12:25:14<8:53:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7179/12318 [12:25:21<8:53:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7179/12318 [12:25:21<8:53:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7180/12318 [12:25:26<8:53:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7180/12318 [12:25:26<8:53:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7181/12318 [12:25:28<8:53:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7181/12318 [12:25:28<8:53:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7182/12318 [12:25:34<8:53:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7182/12318 [12:25:34<8:53:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7183/12318 [12:25:36<8:53:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7183/12318 [12:25:36<8:53:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7184/12318 [12:25:45<8:52:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7184/12318 [12:25:45<8:52:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7185/12318 [12:25:51<8:52:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7185/12318 [12:25:51<8:52:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7186/12318 [12:26:00<8:52:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7186/12318 [12:26:00<8:52:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7187/12318 [12:26:05<8:52:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7187/12318 [12:26:05<8:52:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7188/12318 [12:26:14<8:52:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7188/12318 [12:26:14<8:52:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7189/12318 [12:26:16<8:52:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7189/12318 [12:26:16<8:52:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7190/12318 [12:26:19<8:52:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7190/12318 [12:26:19<8:52:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7191/12318 [12:26:28<8:52:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7191/12318 [12:26:28<8:52:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7192/12318 [12:26:33<8:52:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7192/12318 [12:26:33<8:52:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7193/12318 [12:26:41<8:52:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7193/12318 [12:26:41<8:52:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7194/12318 [12:26:49<8:51:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7194/12318 [12:26:49<8:51:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7195/12318 [12:26:55<8:51:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7195/12318 [12:26:55<8:51:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7196/12318 [12:27:01<8:51:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7196/12318 [12:27:01<8:51:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7197/12318 [12:27:03<8:51:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7197/12318 [12:27:03<8:51:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7198/12318 [12:27:05<8:51:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7198/12318 [12:27:05<8:51:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7199/12318 [12:27:07<8:51:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7199/12318 [12:27:07<8:51:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7200/12318 [12:27:34<8:51:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7200/12318 [12:27:34<8:51:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7201/12318 [12:28:01<8:51:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7201/12318 [12:28:01<8:51:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7202/12318 [12:28:07<8:51:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7202/12318 [12:28:07<8:51:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7203/12318 [12:28:16<8:51:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7203/12318 [12:28:16<8:51:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7204/12318 [12:28:21<8:51:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7204/12318 [12:28:21<8:51:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7205/12318 [12:28:27<8:51:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7205/12318 [12:28:27<8:51:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  58%|▌| 7206/12318 [12:28:32<8:51:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  58%|▌| 7206/12318 [12:28:32<8:51:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7207/12318 [12:28:39<8:50:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7207/12318 [12:28:39<8:50:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7208/12318 [12:28:46<8:50:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7208/12318 [12:28:46<8:50:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7209/12318 [12:28:51<8:50:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7209/12318 [12:28:51<8:50:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7210/12318 [12:29:00<8:50:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7210/12318 [12:29:00<8:50:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7211/12318 [12:29:04<8:50:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7211/12318 [12:29:04<8:50:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7212/12318 [12:29:13<8:50:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7212/12318 [12:29:13<8:50:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7213/12318 [12:29:18<8:50:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7213/12318 [12:29:18<8:50:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7214/12318 [12:29:22<8:50:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7214/12318 [12:29:22<8:50:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7215/12318 [12:29:26<8:50:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7215/12318 [12:29:26<8:50:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7216/12318 [12:29:30<8:49:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7216/12318 [12:29:30<8:49:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7217/12318 [12:29:35<8:49:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7217/12318 [12:29:35<8:49:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7218/12318 [12:29:41<8:49:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7218/12318 [12:29:41<8:49:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7219/12318 [12:29:50<8:49:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7219/12318 [12:29:50<8:49:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7220/12318 [12:29:53<8:49:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7220/12318 [12:29:53<8:49:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7221/12318 [12:29:55<8:49:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7221/12318 [12:29:55<8:49:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7222/12318 [12:30:01<8:49:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7222/12318 [12:30:01<8:49:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7223/12318 [12:30:09<8:49:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7223/12318 [12:30:09<8:49:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7224/12318 [12:30:14<8:49:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7224/12318 [12:30:14<8:49:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7225/12318 [12:30:17<8:48:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7225/12318 [12:30:17<8:48:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7226/12318 [12:30:26<8:48:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7226/12318 [12:30:26<8:48:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7227/12318 [12:30:33<8:48:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7227/12318 [12:30:33<8:48:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7228/12318 [12:30:39<8:48:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7228/12318 [12:30:39<8:48:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7229/12318 [12:30:47<8:48:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7229/12318 [12:30:47<8:48:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7230/12318 [12:30:54<8:48:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7230/12318 [12:30:54<8:48:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7231/12318 [12:31:03<8:48:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7231/12318 [12:31:03<8:48:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7232/12318 [12:31:19<8:48:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7232/12318 [12:31:19<8:48:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7233/12318 [12:31:24<8:48:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7233/12318 [12:31:24<8:48:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7234/12318 [12:31:31<8:48:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7234/12318 [12:31:31<8:48:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7235/12318 [12:31:32<8:48:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7235/12318 [12:31:32<8:48:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7236/12318 [12:31:37<8:47:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7236/12318 [12:31:37<8:47:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7237/12318 [12:31:43<8:47:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7237/12318 [12:31:43<8:47:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7238/12318 [12:31:49<8:47:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7238/12318 [12:31:49<8:47:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7239/12318 [12:31:58<8:47:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7239/12318 [12:31:58<8:47:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7240/12318 [12:32:04<8:47:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7240/12318 [12:32:04<8:47:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7241/12318 [12:32:06<8:47:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7241/12318 [12:32:06<8:47:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7242/12318 [12:32:08<8:47:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7242/12318 [12:32:08<8:47:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7243/12318 [12:32:17<8:47:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7243/12318 [12:32:17<8:47:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7244/12318 [12:32:20<8:46:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7244/12318 [12:32:20<8:46:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7245/12318 [12:32:28<8:46:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7245/12318 [12:32:28<8:46:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7246/12318 [12:32:34<8:46:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7246/12318 [12:32:34<8:46:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7247/12318 [12:32:42<8:46:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7247/12318 [12:32:42<8:46:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7248/12318 [12:32:48<8:46:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7248/12318 [12:32:48<8:46:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7249/12318 [12:32:53<8:46:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7249/12318 [12:32:53<8:46:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7250/12318 [12:33:00<8:46:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7250/12318 [12:33:00<8:46:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7251/12318 [12:33:03<8:46:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7251/12318 [12:33:03<8:46:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7252/12318 [12:33:10<8:46:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7252/12318 [12:33:10<8:46:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7253/12318 [12:33:18<8:46:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7253/12318 [12:33:18<8:46:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7254/12318 [12:33:26<8:45:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7254/12318 [12:33:26<8:45:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7255/12318 [12:33:28<8:45:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7255/12318 [12:33:28<8:45:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7256/12318 [12:33:36<8:45:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7256/12318 [12:33:36<8:45:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7257/12318 [12:33:40<8:45:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7257/12318 [12:33:40<8:45:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7258/12318 [12:33:49<8:45:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7258/12318 [12:33:49<8:45:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7259/12318 [12:33:54<8:45:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7259/12318 [12:33:54<8:45:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7260/12318 [12:34:02<8:45:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7260/12318 [12:34:02<8:45:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7261/12318 [12:34:09<8:45:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7261/12318 [12:34:09<8:45:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7262/12318 [12:34:14<8:45:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7262/12318 [12:34:14<8:45:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7263/12318 [12:34:22<8:45:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7263/12318 [12:34:22<8:45:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7264/12318 [12:34:38<8:45:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7264/12318 [12:34:38<8:45:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7265/12318 [12:34:43<8:44:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7265/12318 [12:34:43<8:44:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7266/12318 [12:34:49<8:44:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7266/12318 [12:34:49<8:44:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7267/12318 [12:34:53<8:44:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7267/12318 [12:34:53<8:44:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7268/12318 [12:34:56<8:44:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7268/12318 [12:34:56<8:44:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7269/12318 [12:34:59<8:44:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7269/12318 [12:34:59<8:44:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7270/12318 [12:35:05<8:44:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7270/12318 [12:35:05<8:44:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7271/12318 [12:35:09<8:44:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7271/12318 [12:35:09<8:44:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7272/12318 [12:35:16<8:44:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7272/12318 [12:35:16<8:44:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7273/12318 [12:35:25<8:44:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7273/12318 [12:35:25<8:44:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7274/12318 [12:35:33<8:43:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7274/12318 [12:35:33<8:43:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7275/12318 [12:35:36<8:43:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7275/12318 [12:35:36<8:43:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7276/12318 [12:35:38<8:43:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7276/12318 [12:35:38<8:43:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7277/12318 [12:35:40<8:43:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7277/12318 [12:35:40<8:43:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7278/12318 [12:35:47<8:43:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7278/12318 [12:35:47<8:43:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7279/12318 [12:35:52<8:43:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7279/12318 [12:35:52<8:43:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7280/12318 [12:35:57<8:43:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7280/12318 [12:35:57<8:43:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7281/12318 [12:36:03<8:43:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7281/12318 [12:36:03<8:43:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7282/12318 [12:36:11<8:42:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7282/12318 [12:36:11<8:42:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7283/12318 [12:36:20<8:42:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7283/12318 [12:36:20<8:42:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7284/12318 [12:36:22<8:42:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7284/12318 [12:36:22<8:42:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7285/12318 [12:36:25<8:42:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7285/12318 [12:36:25<8:42:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7286/12318 [12:36:33<8:42:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7286/12318 [12:36:33<8:42:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7287/12318 [12:36:38<8:42:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7287/12318 [12:36:38<8:42:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7288/12318 [12:36:46<8:42:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7288/12318 [12:36:46<8:42:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7289/12318 [12:36:53<8:42:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7289/12318 [12:36:53<8:42:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7290/12318 [12:37:02<8:42:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7290/12318 [12:37:02<8:42:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7291/12318 [12:37:06<8:42:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7291/12318 [12:37:06<8:42:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7292/12318 [12:37:14<8:41:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7292/12318 [12:37:14<8:41:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7293/12318 [12:37:23<8:41:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7293/12318 [12:37:23<8:41:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7294/12318 [12:37:30<8:41:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7294/12318 [12:37:30<8:41:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7295/12318 [12:37:34<8:41:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7295/12318 [12:37:34<8:41:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7296/12318 [12:38:24<8:42:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7296/12318 [12:38:24<8:42:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7297/12318 [12:38:33<8:41:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7297/12318 [12:38:33<8:41:57,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7298/12318 [12:38:36<8:41:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7298/12318 [12:38:36<8:41:48,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7299/12318 [12:38:44<8:41:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7299/12318 [12:38:44<8:41:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7300/12318 [12:38:48<8:41:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7300/12318 [12:38:48<8:41:35,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7301/12318 [12:38:54<8:41:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7301/12318 [12:38:54<8:41:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7302/12318 [12:39:02<8:41:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7302/12318 [12:39:02<8:41:24,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7303/12318 [12:39:06<8:41:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7303/12318 [12:39:06<8:41:17,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7304/12318 [12:39:14<8:41:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7304/12318 [12:39:14<8:41:12,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7305/12318 [12:39:23<8:41:07,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7305/12318 [12:39:23<8:41:07,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7306/12318 [12:39:30<8:41:02,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7306/12318 [12:39:30<8:41:02,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7307/12318 [12:39:36<8:40:55,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7307/12318 [12:39:36<8:40:55,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7308/12318 [12:39:42<8:40:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7308/12318 [12:39:42<8:40:49,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7309/12318 [12:39:46<8:40:41,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7309/12318 [12:39:46<8:40:41,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7310/12318 [12:39:49<8:40:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7310/12318 [12:39:49<8:40:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7311/12318 [12:39:53<8:40:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7311/12318 [12:39:53<8:40:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7312/12318 [12:40:02<8:40:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7312/12318 [12:40:02<8:40:20,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7313/12318 [12:40:04<8:40:11,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7313/12318 [12:40:04<8:40:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7314/12318 [12:40:05<8:40:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7314/12318 [12:40:05<8:40:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7315/12318 [12:40:13<8:39:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7315/12318 [12:40:13<8:39:56,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7316/12318 [12:40:21<8:39:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7316/12318 [12:40:21<8:39:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7317/12318 [12:40:26<8:39:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7317/12318 [12:40:26<8:39:44,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7318/12318 [12:40:33<8:39:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7318/12318 [12:40:33<8:39:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7319/12318 [12:40:35<8:39:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7319/12318 [12:40:35<8:39:29,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7320/12318 [12:40:41<8:39:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7320/12318 [12:40:41<8:39:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7321/12318 [12:40:47<8:39:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7321/12318 [12:40:47<8:39:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7322/12318 [12:40:49<8:39:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7322/12318 [12:40:49<8:39:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7323/12318 [12:40:58<8:39:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7323/12318 [12:40:58<8:39:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7324/12318 [12:41:02<8:38:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7324/12318 [12:41:02<8:38:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7325/12318 [12:41:08<8:38:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7325/12318 [12:41:08<8:38:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7326/12318 [12:41:11<8:38:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7326/12318 [12:41:11<8:38:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7327/12318 [12:41:12<8:38:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7327/12318 [12:41:12<8:38:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7328/12318 [12:41:30<8:38:33,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7328/12318 [12:41:30<8:38:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  59%|▌| 7329/12318 [12:41:35<8:38:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  59%|▌| 7329/12318 [12:41:35<8:38:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7330/12318 [12:41:40<8:38:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7330/12318 [12:41:40<8:38:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7331/12318 [12:41:42<8:38:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7331/12318 [12:41:42<8:38:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7332/12318 [12:41:46<8:38:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7332/12318 [12:41:46<8:38:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7333/12318 [12:41:54<8:37:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7333/12318 [12:41:54<8:37:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7334/12318 [12:42:00<8:37:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7334/12318 [12:42:00<8:37:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7335/12318 [12:42:04<8:37:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7335/12318 [12:42:04<8:37:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7336/12318 [12:42:09<8:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7336/12318 [12:42:09<8:37:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7337/12318 [12:42:11<8:37:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7337/12318 [12:42:11<8:37:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7338/12318 [12:42:18<8:37:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7338/12318 [12:42:18<8:37:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7339/12318 [12:42:27<8:37:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7339/12318 [12:42:27<8:37:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7340/12318 [12:42:29<8:37:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7340/12318 [12:42:29<8:37:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7341/12318 [12:42:33<8:36:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7341/12318 [12:42:33<8:36:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7342/12318 [12:42:40<8:36:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7342/12318 [12:42:40<8:36:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7343/12318 [12:42:46<8:36:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7343/12318 [12:42:46<8:36:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7344/12318 [12:42:54<8:36:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7344/12318 [12:42:54<8:36:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7345/12318 [12:42:57<8:36:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7345/12318 [12:42:57<8:36:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7346/12318 [12:43:03<8:36:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7346/12318 [12:43:03<8:36:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7347/12318 [12:43:06<8:36:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7347/12318 [12:43:06<8:36:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7348/12318 [12:43:11<8:36:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7348/12318 [12:43:11<8:36:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7349/12318 [12:43:14<8:36:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7349/12318 [12:43:14<8:36:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7350/12318 [12:43:23<8:35:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7350/12318 [12:43:23<8:35:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7351/12318 [12:43:28<8:35:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7351/12318 [12:43:28<8:35:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7352/12318 [12:43:33<8:35:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7352/12318 [12:43:33<8:35:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7353/12318 [12:43:40<8:35:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7353/12318 [12:43:40<8:35:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7354/12318 [12:43:43<8:35:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7354/12318 [12:43:43<8:35:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7355/12318 [12:43:45<8:35:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7355/12318 [12:43:45<8:35:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7356/12318 [12:43:49<8:35:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7356/12318 [12:43:49<8:35:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7357/12318 [12:43:51<8:35:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7357/12318 [12:43:51<8:35:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7358/12318 [12:43:58<8:34:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7358/12318 [12:43:58<8:34:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7359/12318 [12:44:03<8:34:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7359/12318 [12:44:03<8:34:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7360/12318 [12:44:48<8:35:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7360/12318 [12:44:48<8:35:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7361/12318 [12:44:55<8:35:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7361/12318 [12:44:55<8:35:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7362/12318 [12:44:59<8:34:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7362/12318 [12:44:59<8:34:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7363/12318 [12:45:04<8:34:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7363/12318 [12:45:04<8:34:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7364/12318 [12:45:06<8:34:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7364/12318 [12:45:06<8:34:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7365/12318 [12:45:15<8:34:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7365/12318 [12:45:15<8:34:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7366/12318 [12:45:21<8:34:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7366/12318 [12:45:21<8:34:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7367/12318 [12:45:25<8:34:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7367/12318 [12:45:25<8:34:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7368/12318 [12:45:34<8:34:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7368/12318 [12:45:34<8:34:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7369/12318 [12:45:38<8:34:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7369/12318 [12:45:38<8:34:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7370/12318 [12:45:43<8:34:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7370/12318 [12:45:43<8:34:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7371/12318 [12:45:48<8:33:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7371/12318 [12:45:48<8:33:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7372/12318 [12:45:51<8:33:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7372/12318 [12:45:51<8:33:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7373/12318 [12:45:59<8:33:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7373/12318 [12:45:59<8:33:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7374/12318 [12:46:08<8:33:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7374/12318 [12:46:08<8:33:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7375/12318 [12:46:11<8:33:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7375/12318 [12:46:11<8:33:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7376/12318 [12:46:20<8:33:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7376/12318 [12:46:20<8:33:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7377/12318 [12:46:26<8:33:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7377/12318 [12:46:26<8:33:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7378/12318 [12:46:27<8:33:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7378/12318 [12:46:27<8:33:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7379/12318 [12:46:35<8:33:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7379/12318 [12:46:35<8:33:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7380/12318 [12:46:37<8:32:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7380/12318 [12:46:37<8:32:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7381/12318 [12:46:45<8:32:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7381/12318 [12:46:45<8:32:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7382/12318 [12:46:49<8:32:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7382/12318 [12:46:49<8:32:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7383/12318 [12:46:54<8:32:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7383/12318 [12:46:54<8:32:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7384/12318 [12:47:03<8:32:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7384/12318 [12:47:03<8:32:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7385/12318 [12:47:09<8:32:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7385/12318 [12:47:09<8:32:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7386/12318 [12:47:14<8:32:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7386/12318 [12:47:14<8:32:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7387/12318 [12:47:22<8:32:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7387/12318 [12:47:22<8:32:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7388/12318 [12:47:31<8:32:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7388/12318 [12:47:31<8:32:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7389/12318 [12:47:39<8:32:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7389/12318 [12:47:39<8:32:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7390/12318 [12:47:45<8:31:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7390/12318 [12:47:45<8:31:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7391/12318 [12:47:50<8:31:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7391/12318 [12:47:50<8:31:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7392/12318 [12:48:08<8:31:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7392/12318 [12:48:08<8:31:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7393/12318 [12:48:14<8:31:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7393/12318 [12:48:14<8:31:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7394/12318 [12:48:17<8:31:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7394/12318 [12:48:17<8:31:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7395/12318 [12:48:26<8:31:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7395/12318 [12:48:26<8:31:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7396/12318 [12:48:28<8:31:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7396/12318 [12:48:28<8:31:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7397/12318 [12:48:32<8:31:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7397/12318 [12:48:32<8:31:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7398/12318 [12:48:38<8:31:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7398/12318 [12:48:38<8:31:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7399/12318 [12:48:43<8:31:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7399/12318 [12:48:43<8:31:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7400/12318 [12:48:46<8:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7400/12318 [12:48:46<8:30:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7401/12318 [12:48:53<8:30:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7401/12318 [12:48:53<8:30:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7402/12318 [12:48:58<8:30:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7402/12318 [12:48:58<8:30:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7403/12318 [12:49:05<8:30:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7403/12318 [12:49:05<8:30:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7404/12318 [12:49:09<8:30:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7404/12318 [12:49:09<8:30:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7405/12318 [12:49:15<8:30:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7405/12318 [12:49:15<8:30:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7406/12318 [12:49:18<8:30:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7406/12318 [12:49:18<8:30:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7407/12318 [12:49:24<8:30:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7407/12318 [12:49:24<8:30:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7408/12318 [12:49:29<8:30:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7408/12318 [12:49:29<8:30:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7409/12318 [12:49:35<8:29:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7409/12318 [12:49:35<8:29:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7410/12318 [12:49:37<8:29:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7410/12318 [12:49:37<8:29:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7411/12318 [12:49:44<8:29:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7411/12318 [12:49:44<8:29:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7412/12318 [12:49:49<8:29:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7412/12318 [12:49:49<8:29:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7413/12318 [12:49:54<8:29:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7413/12318 [12:49:54<8:29:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7414/12318 [12:49:56<8:29:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7414/12318 [12:49:56<8:29:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7415/12318 [12:50:02<8:29:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7415/12318 [12:50:02<8:29:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7416/12318 [12:50:07<8:29:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7416/12318 [12:50:07<8:29:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7417/12318 [12:50:13<8:28:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7417/12318 [12:50:13<8:28:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7418/12318 [12:50:19<8:28:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7418/12318 [12:50:19<8:28:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7419/12318 [12:50:23<8:28:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7419/12318 [12:50:23<8:28:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7420/12318 [12:50:32<8:28:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7420/12318 [12:50:32<8:28:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7421/12318 [12:50:40<8:28:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7421/12318 [12:50:40<8:28:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7422/12318 [12:50:43<8:28:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7422/12318 [12:50:43<8:28:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7423/12318 [12:50:48<8:28:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7423/12318 [12:50:48<8:28:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7424/12318 [12:51:20<8:28:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7424/12318 [12:51:20<8:28:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7425/12318 [12:51:26<8:28:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7425/12318 [12:51:26<8:28:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7426/12318 [12:51:35<8:28:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7426/12318 [12:51:35<8:28:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7427/12318 [12:51:40<8:28:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7427/12318 [12:51:40<8:28:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7428/12318 [12:51:42<8:28:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7428/12318 [12:51:42<8:28:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7429/12318 [12:51:46<8:27:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7429/12318 [12:51:46<8:27:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7430/12318 [12:51:54<8:27:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7430/12318 [12:51:54<8:27:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7431/12318 [12:52:00<8:27:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7431/12318 [12:52:00<8:27:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7432/12318 [12:52:08<8:27:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7432/12318 [12:52:08<8:27:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7433/12318 [12:52:17<8:27:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7433/12318 [12:52:17<8:27:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7434/12318 [12:52:21<8:27:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7434/12318 [12:52:21<8:27:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7435/12318 [12:52:23<8:27:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7435/12318 [12:52:23<8:27:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7436/12318 [12:52:27<8:27:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7436/12318 [12:52:27<8:27:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7437/12318 [12:52:35<8:27:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7437/12318 [12:52:35<8:27:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7438/12318 [12:52:38<8:26:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7438/12318 [12:52:38<8:26:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7439/12318 [12:52:39<8:26:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7439/12318 [12:52:39<8:26:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7440/12318 [12:52:45<8:26:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7440/12318 [12:52:45<8:26:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7441/12318 [12:52:50<8:26:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7441/12318 [12:52:50<8:26:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7442/12318 [12:52:56<8:26:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7442/12318 [12:52:56<8:26:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7443/12318 [12:52:59<8:26:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7443/12318 [12:52:59<8:26:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7444/12318 [12:53:03<8:26:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7444/12318 [12:53:03<8:26:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7445/12318 [12:53:09<8:26:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7445/12318 [12:53:09<8:26:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7446/12318 [12:53:16<8:25:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7446/12318 [12:53:16<8:25:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7447/12318 [12:53:22<8:25:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7447/12318 [12:53:22<8:25:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7448/12318 [12:53:30<8:25:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7448/12318 [12:53:30<8:25:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7449/12318 [12:53:33<8:25:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7449/12318 [12:53:33<8:25:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7450/12318 [12:53:38<8:25:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7450/12318 [12:53:38<8:25:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7451/12318 [12:53:42<8:25:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7451/12318 [12:53:42<8:25:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  60%|▌| 7452/12318 [12:53:43<8:25:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  60%|▌| 7452/12318 [12:53:43<8:25:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7453/12318 [12:53:48<8:25:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7453/12318 [12:53:48<8:25:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7454/12318 [12:53:54<8:24:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7454/12318 [12:53:54<8:24:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7455/12318 [12:53:59<8:24:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7455/12318 [12:53:59<8:24:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7456/12318 [12:54:28<8:25:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7456/12318 [12:54:28<8:25:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7457/12318 [12:54:37<8:24:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7457/12318 [12:54:37<8:24:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7458/12318 [12:54:42<8:24:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7458/12318 [12:54:42<8:24:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7459/12318 [12:54:48<8:24:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7459/12318 [12:54:48<8:24:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7460/12318 [12:54:54<8:24:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7460/12318 [12:54:54<8:24:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7461/12318 [12:54:56<8:24:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7461/12318 [12:54:56<8:24:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7462/12318 [12:55:01<8:24:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7462/12318 [12:55:01<8:24:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7463/12318 [12:55:08<8:24:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7463/12318 [12:55:08<8:24:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7464/12318 [12:55:16<8:24:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7464/12318 [12:55:16<8:24:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7465/12318 [12:55:21<8:24:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7465/12318 [12:55:21<8:24:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7466/12318 [12:55:28<8:23:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7466/12318 [12:55:28<8:23:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7467/12318 [12:55:35<8:23:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7467/12318 [12:55:35<8:23:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7468/12318 [12:55:42<8:23:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7468/12318 [12:55:42<8:23:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7469/12318 [12:55:49<8:23:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7469/12318 [12:55:49<8:23:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7470/12318 [12:55:56<8:23:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7470/12318 [12:55:56<8:23:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7471/12318 [12:56:04<8:23:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7471/12318 [12:56:04<8:23:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7472/12318 [12:56:10<8:23:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7472/12318 [12:56:10<8:23:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7473/12318 [12:56:15<8:23:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7473/12318 [12:56:15<8:23:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7474/12318 [12:56:24<8:23:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7474/12318 [12:56:24<8:23:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7475/12318 [12:56:27<8:23:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7475/12318 [12:56:27<8:23:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7476/12318 [12:56:32<8:22:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7476/12318 [12:56:32<8:22:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7477/12318 [12:56:34<8:22:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7477/12318 [12:56:34<8:22:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7478/12318 [12:56:39<8:22:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7478/12318 [12:56:39<8:22:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7479/12318 [12:56:43<8:22:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7479/12318 [12:56:43<8:22:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7480/12318 [12:56:50<8:22:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7480/12318 [12:56:50<8:22:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7481/12318 [12:56:54<8:22:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7481/12318 [12:56:54<8:22:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7482/12318 [12:57:02<8:22:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7482/12318 [12:57:02<8:22:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7483/12318 [12:57:07<8:22:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7483/12318 [12:57:07<8:22:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7484/12318 [12:57:12<8:22:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7484/12318 [12:57:12<8:22:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7485/12318 [12:57:19<8:21:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7485/12318 [12:57:19<8:21:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7486/12318 [12:57:24<8:21:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7486/12318 [12:57:24<8:21:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7487/12318 [12:57:26<8:21:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7487/12318 [12:57:26<8:21:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7488/12318 [12:57:43<8:21:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7488/12318 [12:57:43<8:21:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7489/12318 [12:57:45<8:21:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7489/12318 [12:57:45<8:21:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7490/12318 [12:57:52<8:21:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7490/12318 [12:57:52<8:21:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7491/12318 [12:57:56<8:21:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7491/12318 [12:57:56<8:21:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7492/12318 [12:58:01<8:21:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7492/12318 [12:58:01<8:21:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7493/12318 [12:58:02<8:21:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7493/12318 [12:58:02<8:21:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7494/12318 [12:58:06<8:20:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7494/12318 [12:58:06<8:20:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7495/12318 [12:58:14<8:20:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7495/12318 [12:58:14<8:20:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7496/12318 [12:58:17<8:20:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7496/12318 [12:58:17<8:20:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7497/12318 [12:58:23<8:20:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7497/12318 [12:58:23<8:20:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7498/12318 [12:58:28<8:20:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7498/12318 [12:58:28<8:20:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7499/12318 [12:58:36<8:20:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7499/12318 [12:58:36<8:20:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7500/12318 [12:58:40<8:20:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7500/12318 [12:58:40<8:20:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7501/12318 [12:58:48<8:20:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7501/12318 [12:58:48<8:20:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7502/12318 [12:58:52<8:20:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7502/12318 [12:58:52<8:20:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7503/12318 [12:58:53<8:19:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7503/12318 [12:58:53<8:19:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7504/12318 [12:59:00<8:19:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7504/12318 [12:59:00<8:19:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7505/12318 [12:59:05<8:19:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7505/12318 [12:59:05<8:19:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7506/12318 [12:59:08<8:19:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7506/12318 [12:59:08<8:19:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7507/12318 [12:59:14<8:19:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7507/12318 [12:59:14<8:19:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7508/12318 [12:59:22<8:19:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7508/12318 [12:59:22<8:19:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7509/12318 [12:59:27<8:19:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7509/12318 [12:59:27<8:19:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7510/12318 [12:59:33<8:19:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7510/12318 [12:59:33<8:19:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7511/12318 [12:59:35<8:18:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7511/12318 [12:59:35<8:18:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7512/12318 [12:59:42<8:18:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7512/12318 [12:59:42<8:18:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7513/12318 [12:59:49<8:18:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7513/12318 [12:59:49<8:18:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7514/12318 [12:59:55<8:18:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7514/12318 [12:59:55<8:18:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7515/12318 [13:00:02<8:18:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7515/12318 [13:00:02<8:18:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7516/12318 [13:00:07<8:18:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7516/12318 [13:00:07<8:18:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7517/12318 [13:00:15<8:18:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7517/12318 [13:00:15<8:18:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7518/12318 [13:00:24<8:18:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7518/12318 [13:00:24<8:18:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7519/12318 [13:00:30<8:18:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7519/12318 [13:00:30<8:18:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7520/12318 [13:01:13<8:18:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7520/12318 [13:01:13<8:18:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7521/12318 [13:01:16<8:18:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7521/12318 [13:01:16<8:18:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7522/12318 [13:01:24<8:18:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7522/12318 [13:01:24<8:18:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7523/12318 [13:01:26<8:18:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7523/12318 [13:01:26<8:18:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7524/12318 [13:01:34<8:17:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7524/12318 [13:01:34<8:17:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7525/12318 [13:01:39<8:17:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7525/12318 [13:01:39<8:17:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7526/12318 [13:01:43<8:17:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7526/12318 [13:01:43<8:17:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7527/12318 [13:01:49<8:17:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7527/12318 [13:01:49<8:17:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7528/12318 [13:01:55<8:17:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7528/12318 [13:01:55<8:17:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7529/12318 [13:02:00<8:17:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7529/12318 [13:02:00<8:17:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7530/12318 [13:02:01<8:17:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7530/12318 [13:02:01<8:17:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7531/12318 [13:02:07<8:17:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7531/12318 [13:02:07<8:17:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7532/12318 [13:02:13<8:17:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7532/12318 [13:02:13<8:17:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7533/12318 [13:02:21<8:16:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7533/12318 [13:02:21<8:16:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7534/12318 [13:02:23<8:16:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7534/12318 [13:02:23<8:16:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7535/12318 [13:02:27<8:16:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7535/12318 [13:02:27<8:16:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7536/12318 [13:02:28<8:16:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7536/12318 [13:02:28<8:16:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7537/12318 [13:02:31<8:16:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7537/12318 [13:02:31<8:16:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7538/12318 [13:02:34<8:16:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7538/12318 [13:02:34<8:16:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7539/12318 [13:02:35<8:16:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7539/12318 [13:02:35<8:16:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7540/12318 [13:02:38<8:15:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7540/12318 [13:02:38<8:15:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7541/12318 [13:02:47<8:15:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7541/12318 [13:02:47<8:15:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7542/12318 [13:02:53<8:15:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7542/12318 [13:02:53<8:15:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7543/12318 [13:02:56<8:15:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7543/12318 [13:02:56<8:15:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7544/12318 [13:03:04<8:15:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7544/12318 [13:03:04<8:15:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7545/12318 [13:03:13<8:15:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7545/12318 [13:03:13<8:15:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7546/12318 [13:03:19<8:15:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7546/12318 [13:03:19<8:15:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7547/12318 [13:03:25<8:15:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7547/12318 [13:03:25<8:15:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7548/12318 [13:03:27<8:15:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7548/12318 [13:03:27<8:15:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7549/12318 [13:03:29<8:14:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7549/12318 [13:03:29<8:14:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7550/12318 [13:03:36<8:14:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7550/12318 [13:03:36<8:14:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7551/12318 [13:03:39<8:14:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7551/12318 [13:03:39<8:14:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7552/12318 [13:04:43<8:15:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7552/12318 [13:04:43<8:15:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7553/12318 [13:04:44<8:15:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7553/12318 [13:04:44<8:15:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7554/12318 [13:04:50<8:14:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7554/12318 [13:04:50<8:14:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7555/12318 [13:04:55<8:14:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7555/12318 [13:04:55<8:14:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7556/12318 [13:05:03<8:14:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7556/12318 [13:05:03<8:14:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7557/12318 [13:05:09<8:14:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7557/12318 [13:05:09<8:14:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7558/12318 [13:05:13<8:14:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7558/12318 [13:05:13<8:14:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7559/12318 [13:05:16<8:14:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7559/12318 [13:05:16<8:14:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7560/12318 [13:05:18<8:14:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7560/12318 [13:05:18<8:14:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7561/12318 [13:05:25<8:14:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7561/12318 [13:05:25<8:14:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7562/12318 [13:05:28<8:14:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7562/12318 [13:05:28<8:14:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7563/12318 [13:05:34<8:13:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7563/12318 [13:05:34<8:13:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7564/12318 [13:05:42<8:13:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7564/12318 [13:05:42<8:13:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7565/12318 [13:05:46<8:13:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7565/12318 [13:05:46<8:13:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7566/12318 [13:05:51<8:13:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7566/12318 [13:05:51<8:13:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7567/12318 [13:06:00<8:13:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7567/12318 [13:06:00<8:13:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7568/12318 [13:06:03<8:13:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7568/12318 [13:06:03<8:13:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7569/12318 [13:06:09<8:13:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7569/12318 [13:06:09<8:13:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7570/12318 [13:06:14<8:13:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7570/12318 [13:06:14<8:13:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7571/12318 [13:06:16<8:12:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7571/12318 [13:06:16<8:12:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7572/12318 [13:06:18<8:12:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7572/12318 [13:06:18<8:12:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7573/12318 [13:06:23<8:12:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7573/12318 [13:06:23<8:12:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7574/12318 [13:06:29<8:12:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7574/12318 [13:06:29<8:12:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  61%|▌| 7575/12318 [13:06:32<8:12:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  61%|▌| 7575/12318 [13:06:32<8:12:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7576/12318 [13:06:39<8:12:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7576/12318 [13:06:39<8:12:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7577/12318 [13:06:42<8:12:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7577/12318 [13:06:42<8:12:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7578/12318 [13:06:49<8:12:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7578/12318 [13:06:49<8:12:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7579/12318 [13:06:53<8:12:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7579/12318 [13:06:53<8:12:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7580/12318 [13:07:02<8:11:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7580/12318 [13:07:02<8:11:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7581/12318 [13:07:10<8:11:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7581/12318 [13:07:10<8:11:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7582/12318 [13:07:16<8:11:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7582/12318 [13:07:16<8:11:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7583/12318 [13:07:20<8:11:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7583/12318 [13:07:20<8:11:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7584/12318 [13:08:11<8:11:59,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7584/12318 [13:08:11<8:11:59,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7585/12318 [13:08:17<8:11:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7585/12318 [13:08:17<8:11:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7586/12318 [13:08:24<8:11:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7586/12318 [13:08:24<8:11:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7587/12318 [13:08:28<8:11:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7587/12318 [13:08:28<8:11:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7588/12318 [13:08:31<8:11:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7588/12318 [13:08:31<8:11:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7589/12318 [13:08:35<8:11:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7589/12318 [13:08:35<8:11:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7590/12318 [13:08:39<8:11:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7590/12318 [13:08:39<8:11:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7591/12318 [13:08:43<8:11:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7591/12318 [13:08:43<8:11:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7592/12318 [13:08:49<8:11:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7592/12318 [13:08:49<8:11:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7593/12318 [13:08:52<8:10:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7593/12318 [13:08:52<8:10:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7594/12318 [13:09:00<8:10:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7594/12318 [13:09:00<8:10:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7595/12318 [13:09:05<8:10:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7595/12318 [13:09:05<8:10:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7596/12318 [13:09:10<8:10:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7596/12318 [13:09:10<8:10:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7597/12318 [13:09:19<8:10:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7597/12318 [13:09:19<8:10:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7598/12318 [13:09:23<8:10:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7598/12318 [13:09:23<8:10:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7599/12318 [13:09:28<8:10:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7599/12318 [13:09:28<8:10:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7600/12318 [13:09:30<8:10:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7600/12318 [13:09:30<8:10:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7601/12318 [13:09:36<8:10:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7601/12318 [13:09:36<8:10:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7602/12318 [13:09:42<8:09:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7602/12318 [13:09:42<8:09:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7603/12318 [13:09:44<8:09:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7603/12318 [13:09:44<8:09:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7604/12318 [13:09:50<8:09:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7604/12318 [13:09:50<8:09:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7605/12318 [13:09:59<8:09:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7605/12318 [13:09:59<8:09:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7606/12318 [13:10:08<8:09:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7606/12318 [13:10:08<8:09:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7607/12318 [13:10:15<8:09:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7607/12318 [13:10:15<8:09:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7608/12318 [13:10:22<8:09:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7608/12318 [13:10:22<8:09:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7609/12318 [13:10:30<8:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7609/12318 [13:10:30<8:09:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7610/12318 [13:10:35<8:09:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7610/12318 [13:10:35<8:09:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7611/12318 [13:10:42<8:09:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7611/12318 [13:10:42<8:09:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7612/12318 [13:10:50<8:08:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7612/12318 [13:10:50<8:08:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7613/12318 [13:10:52<8:08:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7613/12318 [13:10:52<8:08:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7614/12318 [13:10:57<8:08:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7614/12318 [13:10:57<8:08:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7615/12318 [13:11:02<8:08:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7615/12318 [13:11:02<8:08:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7616/12318 [13:11:29<8:08:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7616/12318 [13:11:29<8:08:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7617/12318 [13:11:35<8:08:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7617/12318 [13:11:35<8:08:32,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7618/12318 [13:11:41<8:08:26,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7618/12318 [13:11:41<8:08:26,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7619/12318 [13:11:49<8:08:21,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7619/12318 [13:11:49<8:08:21,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7620/12318 [13:11:58<8:08:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7620/12318 [13:11:58<8:08:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7621/12318 [13:12:01<8:08:08,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7621/12318 [13:12:01<8:08:08,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7622/12318 [13:12:06<8:08:01,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7622/12318 [13:12:06<8:08:01,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7623/12318 [13:12:10<8:07:53,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7623/12318 [13:12:10<8:07:53,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7624/12318 [13:12:16<8:07:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7624/12318 [13:12:16<8:07:47,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7625/12318 [13:12:25<8:07:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7625/12318 [13:12:25<8:07:43,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7626/12318 [13:12:34<8:07:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7626/12318 [13:12:34<8:07:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7627/12318 [13:12:37<8:07:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7627/12318 [13:12:37<8:07:30,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7628/12318 [13:12:43<8:07:23,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7628/12318 [13:12:43<8:07:23,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7629/12318 [13:12:48<8:07:16,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7629/12318 [13:12:48<8:07:16,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7630/12318 [13:12:53<8:07:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7630/12318 [13:12:53<8:07:09,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7631/12318 [13:13:00<8:07:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7631/12318 [13:13:00<8:07:03,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7632/12318 [13:13:01<8:06:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7632/12318 [13:13:01<8:06:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7633/12318 [13:13:10<8:06:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7633/12318 [13:13:10<8:06:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7634/12318 [13:13:18<8:06:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7634/12318 [13:13:18<8:06:45,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7635/12318 [13:13:25<8:06:39,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7635/12318 [13:13:25<8:06:39,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7636/12318 [13:13:26<8:06:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7636/12318 [13:13:26<8:06:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7637/12318 [13:13:28<8:06:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7637/12318 [13:13:28<8:06:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7638/12318 [13:13:33<8:06:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7638/12318 [13:13:33<8:06:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7639/12318 [13:13:40<8:06:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7639/12318 [13:13:40<8:06:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7640/12318 [13:13:46<8:06:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7640/12318 [13:13:46<8:06:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7641/12318 [13:13:48<8:05:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7641/12318 [13:13:48<8:05:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7642/12318 [13:13:51<8:05:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7642/12318 [13:13:51<8:05:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7643/12318 [13:13:56<8:05:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7643/12318 [13:13:56<8:05:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7644/12318 [13:14:01<8:05:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7644/12318 [13:14:01<8:05:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7645/12318 [13:14:10<8:05:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7645/12318 [13:14:10<8:05:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7646/12318 [13:14:14<8:05:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7646/12318 [13:14:14<8:05:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7647/12318 [13:14:15<8:05:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7647/12318 [13:14:15<8:05:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7648/12318 [13:14:38<8:05:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7648/12318 [13:14:38<8:05:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7649/12318 [13:14:46<8:05:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7649/12318 [13:14:46<8:05:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7650/12318 [13:14:47<8:04:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7650/12318 [13:14:47<8:04:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7651/12318 [13:14:53<8:04:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7651/12318 [13:14:53<8:04:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7652/12318 [13:14:54<8:04:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7652/12318 [13:14:54<8:04:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7653/12318 [13:14:59<8:04:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7653/12318 [13:14:59<8:04:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7654/12318 [13:15:05<8:04:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7654/12318 [13:15:05<8:04:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7655/12318 [13:15:13<8:04:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7655/12318 [13:15:13<8:04:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7656/12318 [13:15:15<8:04:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7656/12318 [13:15:15<8:04:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7657/12318 [13:15:16<8:04:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7657/12318 [13:15:16<8:04:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7658/12318 [13:15:21<8:03:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7658/12318 [13:15:21<8:03:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7659/12318 [13:15:23<8:03:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7659/12318 [13:15:23<8:03:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7660/12318 [13:15:24<8:03:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7660/12318 [13:15:24<8:03:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7661/12318 [13:15:29<8:03:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7661/12318 [13:15:29<8:03:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7662/12318 [13:15:38<8:03:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7662/12318 [13:15:38<8:03:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7663/12318 [13:15:46<8:03:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7663/12318 [13:15:46<8:03:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7664/12318 [13:15:51<8:03:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7664/12318 [13:15:51<8:03:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7665/12318 [13:15:55<8:03:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7665/12318 [13:15:55<8:03:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7666/12318 [13:16:02<8:03:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7666/12318 [13:16:02<8:03:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7667/12318 [13:16:07<8:02:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7667/12318 [13:16:07<8:02:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7668/12318 [13:16:09<8:02:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7668/12318 [13:16:09<8:02:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7669/12318 [13:16:11<8:02:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7669/12318 [13:16:11<8:02:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7670/12318 [13:16:16<8:02:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7670/12318 [13:16:16<8:02:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7671/12318 [13:16:24<8:02:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7671/12318 [13:16:24<8:02:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7672/12318 [13:16:27<8:02:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7672/12318 [13:16:27<8:02:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7673/12318 [13:16:31<8:02:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7673/12318 [13:16:31<8:02:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7674/12318 [13:16:36<8:02:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7674/12318 [13:16:36<8:02:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7675/12318 [13:16:42<8:01:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7675/12318 [13:16:42<8:01:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7676/12318 [13:16:47<8:01:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7676/12318 [13:16:47<8:01:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7677/12318 [13:16:55<8:01:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7677/12318 [13:16:55<8:01:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7678/12318 [13:16:59<8:01:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7678/12318 [13:16:59<8:01:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7679/12318 [13:17:08<8:01:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7679/12318 [13:17:08<8:01:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7680/12318 [13:17:52<8:01:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7680/12318 [13:17:52<8:01:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7681/12318 [13:17:53<8:01:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7681/12318 [13:17:53<8:01:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7682/12318 [13:17:55<8:01:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7682/12318 [13:17:55<8:01:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7683/12318 [13:17:58<8:01:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7683/12318 [13:17:58<8:01:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7684/12318 [13:17:59<8:01:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7684/12318 [13:17:59<8:01:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7685/12318 [13:18:03<8:01:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7685/12318 [13:18:03<8:01:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7686/12318 [13:18:06<8:00:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7686/12318 [13:18:06<8:00:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7687/12318 [13:18:12<8:00:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7687/12318 [13:18:12<8:00:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7688/12318 [13:18:15<8:00:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7688/12318 [13:18:15<8:00:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7689/12318 [13:18:20<8:00:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7689/12318 [13:18:20<8:00:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7690/12318 [13:18:29<8:00:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7690/12318 [13:18:29<8:00:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7691/12318 [13:18:31<8:00:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7691/12318 [13:18:31<8:00:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7692/12318 [13:18:37<8:00:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7692/12318 [13:18:37<8:00:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7693/12318 [13:18:45<8:00:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7693/12318 [13:18:45<8:00:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7694/12318 [13:18:48<8:00:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7694/12318 [13:18:48<8:00:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7695/12318 [13:18:54<7:59:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7695/12318 [13:18:54<7:59:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7696/12318 [13:18:56<7:59:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7696/12318 [13:18:56<7:59:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7697/12318 [13:19:02<7:59:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7697/12318 [13:19:02<7:59:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  62%|▌| 7698/12318 [13:19:10<7:59:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  62%|▌| 7698/12318 [13:19:10<7:59:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7699/12318 [13:19:19<7:59:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7699/12318 [13:19:19<7:59:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7700/12318 [13:19:22<7:59:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7700/12318 [13:19:22<7:59:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7701/12318 [13:19:28<7:59:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7701/12318 [13:19:28<7:59:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7702/12318 [13:19:37<7:59:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7702/12318 [13:19:37<7:59:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7703/12318 [13:19:42<7:59:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7703/12318 [13:19:42<7:59:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7704/12318 [13:19:49<7:59:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7704/12318 [13:19:49<7:59:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7705/12318 [13:19:50<7:58:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7705/12318 [13:19:50<7:58:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7706/12318 [13:19:52<7:58:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7706/12318 [13:19:52<7:58:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7707/12318 [13:19:58<7:58:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7707/12318 [13:19:58<7:58:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7708/12318 [13:20:01<7:58:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7708/12318 [13:20:01<7:58:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7709/12318 [13:20:04<7:58:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7709/12318 [13:20:04<7:58:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7710/12318 [13:20:11<7:58:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7710/12318 [13:20:11<7:58:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7711/12318 [13:20:13<7:58:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7711/12318 [13:20:13<7:58:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7712/12318 [13:21:14<7:58:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7712/12318 [13:21:14<7:58:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7713/12318 [13:21:17<7:58:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7713/12318 [13:21:17<7:58:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7714/12318 [13:21:24<7:58:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7714/12318 [13:21:24<7:58:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7715/12318 [13:21:27<7:58:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7715/12318 [13:21:27<7:58:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7716/12318 [13:21:31<7:58:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7716/12318 [13:21:31<7:58:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7717/12318 [13:21:36<7:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7717/12318 [13:21:36<7:57:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7718/12318 [13:21:45<7:57:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7718/12318 [13:21:45<7:57:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7719/12318 [13:21:47<7:57:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7719/12318 [13:21:47<7:57:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7720/12318 [13:21:51<7:57:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7720/12318 [13:21:51<7:57:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7721/12318 [13:21:56<7:57:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7721/12318 [13:21:56<7:57:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7722/12318 [13:22:05<7:57:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7722/12318 [13:22:05<7:57:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7723/12318 [13:22:12<7:57:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7723/12318 [13:22:12<7:57:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7724/12318 [13:22:20<7:57:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7724/12318 [13:22:20<7:57:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7725/12318 [13:22:29<7:57:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7725/12318 [13:22:29<7:57:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7726/12318 [13:22:32<7:57:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7726/12318 [13:22:32<7:57:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7727/12318 [13:22:39<7:56:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7727/12318 [13:22:39<7:56:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7728/12318 [13:22:42<7:56:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7728/12318 [13:22:42<7:56:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7729/12318 [13:22:49<7:56:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7729/12318 [13:22:49<7:56:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7730/12318 [13:22:57<7:56:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7730/12318 [13:22:57<7:56:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7731/12318 [13:23:01<7:56:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7731/12318 [13:23:01<7:56:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7732/12318 [13:23:08<7:56:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7732/12318 [13:23:08<7:56:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7733/12318 [13:23:14<7:56:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7733/12318 [13:23:14<7:56:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7734/12318 [13:23:20<7:56:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7734/12318 [13:23:20<7:56:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7735/12318 [13:23:28<7:56:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7735/12318 [13:23:28<7:56:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7736/12318 [13:23:36<7:55:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7736/12318 [13:23:36<7:55:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7737/12318 [13:23:37<7:55:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7737/12318 [13:23:37<7:55:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7738/12318 [13:23:46<7:55:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7738/12318 [13:23:46<7:55:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7739/12318 [13:23:50<7:55:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7739/12318 [13:23:50<7:55:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7740/12318 [13:23:58<7:55:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7740/12318 [13:23:58<7:55:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7741/12318 [13:24:06<7:55:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7741/12318 [13:24:06<7:55:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7742/12318 [13:24:14<7:55:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7742/12318 [13:24:14<7:55:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7743/12318 [13:24:22<7:55:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7743/12318 [13:24:22<7:55:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7744/12318 [13:24:42<7:55:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7744/12318 [13:24:42<7:55:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7745/12318 [13:24:48<7:55:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7745/12318 [13:24:48<7:55:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7746/12318 [13:24:53<7:55:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7746/12318 [13:24:53<7:55:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7747/12318 [13:24:59<7:54:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7747/12318 [13:24:59<7:54:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7748/12318 [13:25:07<7:54:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7748/12318 [13:25:07<7:54:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7749/12318 [13:25:09<7:54:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7749/12318 [13:25:09<7:54:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7750/12318 [13:25:14<7:54:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7750/12318 [13:25:14<7:54:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7751/12318 [13:25:20<7:54:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7751/12318 [13:25:20<7:54:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7752/12318 [13:25:25<7:54:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7752/12318 [13:25:25<7:54:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7753/12318 [13:25:28<7:54:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7753/12318 [13:25:28<7:54:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7754/12318 [13:25:31<7:54:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7754/12318 [13:25:31<7:54:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7755/12318 [13:25:37<7:54:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7755/12318 [13:25:37<7:54:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7756/12318 [13:25:40<7:53:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7756/12318 [13:25:40<7:53:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7757/12318 [13:25:47<7:53:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7757/12318 [13:25:47<7:53:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7758/12318 [13:25:51<7:53:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7758/12318 [13:25:51<7:53:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7759/12318 [13:25:53<7:53:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7759/12318 [13:25:53<7:53:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7760/12318 [13:26:00<7:53:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7760/12318 [13:26:00<7:53:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7761/12318 [13:26:07<7:53:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7761/12318 [13:26:07<7:53:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7762/12318 [13:26:16<7:53:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7762/12318 [13:26:16<7:53:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7763/12318 [13:26:23<7:53:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7763/12318 [13:26:23<7:53:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7764/12318 [13:26:30<7:53:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7764/12318 [13:26:30<7:53:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7765/12318 [13:26:39<7:52:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7765/12318 [13:26:39<7:52:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7766/12318 [13:26:44<7:52:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7766/12318 [13:26:44<7:52:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7767/12318 [13:26:47<7:52:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7767/12318 [13:26:47<7:52:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7768/12318 [13:26:54<7:52:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7768/12318 [13:26:54<7:52:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7769/12318 [13:26:59<7:52:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7769/12318 [13:26:59<7:52:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7770/12318 [13:27:04<7:52:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7770/12318 [13:27:04<7:52:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7771/12318 [13:27:13<7:52:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7771/12318 [13:27:13<7:52:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7772/12318 [13:27:19<7:52:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7772/12318 [13:27:19<7:52:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7773/12318 [13:27:27<7:52:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7773/12318 [13:27:27<7:52:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7774/12318 [13:27:32<7:52:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7774/12318 [13:27:32<7:52:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7775/12318 [13:27:40<7:51:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7775/12318 [13:27:40<7:51:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7776/12318 [13:27:57<7:51:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7776/12318 [13:27:57<7:51:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7777/12318 [13:28:04<7:51:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7777/12318 [13:28:04<7:51:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7778/12318 [13:28:13<7:51:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7778/12318 [13:28:13<7:51:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7779/12318 [13:28:19<7:51:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7779/12318 [13:28:19<7:51:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7780/12318 [13:28:23<7:51:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7780/12318 [13:28:23<7:51:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7781/12318 [13:28:32<7:51:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7781/12318 [13:28:32<7:51:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7782/12318 [13:28:36<7:51:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7782/12318 [13:28:36<7:51:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7783/12318 [13:28:40<7:51:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7783/12318 [13:28:40<7:51:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7784/12318 [13:28:47<7:51:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7784/12318 [13:28:47<7:51:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7785/12318 [13:28:54<7:51:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7785/12318 [13:28:54<7:51:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7786/12318 [13:29:02<7:50:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7786/12318 [13:29:02<7:50:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7787/12318 [13:29:06<7:50:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7787/12318 [13:29:06<7:50:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7788/12318 [13:29:14<7:50:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7788/12318 [13:29:14<7:50:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7789/12318 [13:29:18<7:50:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7789/12318 [13:29:18<7:50:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7790/12318 [13:29:27<7:50:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7790/12318 [13:29:27<7:50:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7791/12318 [13:29:35<7:50:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7791/12318 [13:29:35<7:50:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7792/12318 [13:29:40<7:50:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7792/12318 [13:29:40<7:50:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7793/12318 [13:29:45<7:50:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7793/12318 [13:29:45<7:50:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7794/12318 [13:29:49<7:50:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7794/12318 [13:29:49<7:50:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7795/12318 [13:29:53<7:49:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7795/12318 [13:29:53<7:49:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7796/12318 [13:29:59<7:49:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7796/12318 [13:29:59<7:49:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7797/12318 [13:30:02<7:49:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7797/12318 [13:30:02<7:49:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7798/12318 [13:30:10<7:49:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7798/12318 [13:30:10<7:49:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7799/12318 [13:30:13<7:49:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7799/12318 [13:30:13<7:49:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7800/12318 [13:30:22<7:49:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7800/12318 [13:30:22<7:49:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7801/12318 [13:30:24<7:49:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7801/12318 [13:30:24<7:49:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7802/12318 [13:30:29<7:49:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7802/12318 [13:30:29<7:49:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7803/12318 [13:30:37<7:49:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7803/12318 [13:30:37<7:49:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7804/12318 [13:30:38<7:48:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7804/12318 [13:30:38<7:48:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7805/12318 [13:30:40<7:48:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7805/12318 [13:30:40<7:48:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7806/12318 [13:30:44<7:48:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7806/12318 [13:30:44<7:48:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7807/12318 [13:30:50<7:48:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7807/12318 [13:30:50<7:48:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7808/12318 [13:31:06<7:48:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7808/12318 [13:31:06<7:48:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7809/12318 [13:31:11<7:48:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7809/12318 [13:31:11<7:48:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7810/12318 [13:31:14<7:48:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7810/12318 [13:31:14<7:48:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7811/12318 [13:31:17<7:48:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7811/12318 [13:31:17<7:48:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7812/12318 [13:31:20<7:47:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7812/12318 [13:31:20<7:47:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7813/12318 [13:31:24<7:47:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7813/12318 [13:31:24<7:47:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7814/12318 [13:31:30<7:47:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7814/12318 [13:31:30<7:47:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7815/12318 [13:31:36<7:47:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7815/12318 [13:31:36<7:47:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7816/12318 [13:31:40<7:47:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7816/12318 [13:31:40<7:47:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7817/12318 [13:31:41<7:47:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7817/12318 [13:31:41<7:47:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7818/12318 [13:31:50<7:47:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7818/12318 [13:31:50<7:47:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7819/12318 [13:31:55<7:47:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7819/12318 [13:31:55<7:47:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7820/12318 [13:32:04<7:47:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7820/12318 [13:32:04<7:47:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  63%|▋| 7821/12318 [13:32:06<7:46:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  63%|▋| 7821/12318 [13:32:06<7:46:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7822/12318 [13:32:09<7:46:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7822/12318 [13:32:09<7:46:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7823/12318 [13:32:18<7:46:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7823/12318 [13:32:18<7:46:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7824/12318 [13:32:25<7:46:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7824/12318 [13:32:25<7:46:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7825/12318 [13:32:28<7:46:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7825/12318 [13:32:28<7:46:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7826/12318 [13:32:36<7:46:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7826/12318 [13:32:36<7:46:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7827/12318 [13:32:38<7:46:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7827/12318 [13:32:38<7:46:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7828/12318 [13:32:45<7:46:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7828/12318 [13:32:45<7:46:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7829/12318 [13:32:52<7:46:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7829/12318 [13:32:52<7:46:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7830/12318 [13:32:58<7:45:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7830/12318 [13:32:58<7:45:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7831/12318 [13:33:04<7:45:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7831/12318 [13:33:04<7:45:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7832/12318 [13:33:06<7:45:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7832/12318 [13:33:06<7:45:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7833/12318 [13:33:12<7:45:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7833/12318 [13:33:12<7:45:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7834/12318 [13:33:19<7:45:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7834/12318 [13:33:19<7:45:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7835/12318 [13:33:26<7:45:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7835/12318 [13:33:26<7:45:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7836/12318 [13:33:35<7:45:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7836/12318 [13:33:35<7:45:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7837/12318 [13:33:39<7:45:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7837/12318 [13:33:39<7:45:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7838/12318 [13:33:48<7:45:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7838/12318 [13:33:48<7:45:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7839/12318 [13:33:52<7:45:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7839/12318 [13:33:52<7:45:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7840/12318 [13:34:19<7:45:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7840/12318 [13:34:19<7:45:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7841/12318 [13:34:23<7:44:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7841/12318 [13:34:23<7:44:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7842/12318 [13:34:31<7:44:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7842/12318 [13:34:31<7:44:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7843/12318 [13:34:39<7:44:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7843/12318 [13:34:39<7:44:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7844/12318 [13:34:47<7:44:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7844/12318 [13:34:47<7:44:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7845/12318 [13:34:50<7:44:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7845/12318 [13:34:50<7:44:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7846/12318 [13:34:51<7:44:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7846/12318 [13:34:51<7:44:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7847/12318 [13:34:56<7:44:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7847/12318 [13:34:56<7:44:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7848/12318 [13:35:03<7:44:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7848/12318 [13:35:03<7:44:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7849/12318 [13:35:06<7:44:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7849/12318 [13:35:06<7:44:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7850/12318 [13:35:08<7:43:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7850/12318 [13:35:08<7:43:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7851/12318 [13:35:17<7:43:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7851/12318 [13:35:17<7:43:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7852/12318 [13:35:19<7:43:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7852/12318 [13:35:19<7:43:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7853/12318 [13:35:25<7:43:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7853/12318 [13:35:25<7:43:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7854/12318 [13:35:30<7:43:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7854/12318 [13:35:30<7:43:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7855/12318 [13:35:37<7:43:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7855/12318 [13:35:37<7:43:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7856/12318 [13:35:46<7:43:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7856/12318 [13:35:46<7:43:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7857/12318 [13:35:51<7:43:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7857/12318 [13:35:51<7:43:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7858/12318 [13:35:55<7:43:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7858/12318 [13:35:55<7:43:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7859/12318 [13:35:56<7:42:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7859/12318 [13:35:56<7:42:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7860/12318 [13:36:03<7:42:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7860/12318 [13:36:03<7:42:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7861/12318 [13:36:10<7:42:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7861/12318 [13:36:10<7:42:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7862/12318 [13:36:15<7:42:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7862/12318 [13:36:15<7:42:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7863/12318 [13:36:22<7:42:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7863/12318 [13:36:22<7:42:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7864/12318 [13:36:24<7:42:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7864/12318 [13:36:24<7:42:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7865/12318 [13:36:26<7:42:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7865/12318 [13:36:26<7:42:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7866/12318 [13:36:30<7:42:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7866/12318 [13:36:30<7:42:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7867/12318 [13:36:39<7:42:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7867/12318 [13:36:39<7:42:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7868/12318 [13:36:45<7:41:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7868/12318 [13:36:45<7:41:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7869/12318 [13:36:47<7:41:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7869/12318 [13:36:47<7:41:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7870/12318 [13:36:52<7:41:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7870/12318 [13:36:52<7:41:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7871/12318 [13:37:00<7:41:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7871/12318 [13:37:00<7:41:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7872/12318 [13:37:46<7:41:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7872/12318 [13:37:46<7:41:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7873/12318 [13:37:51<7:41:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7873/12318 [13:37:51<7:41:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7874/12318 [13:37:58<7:41:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7874/12318 [13:37:58<7:41:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7875/12318 [13:38:04<7:41:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7875/12318 [13:38:04<7:41:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7876/12318 [13:38:09<7:41:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7876/12318 [13:38:09<7:41:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7877/12318 [13:38:16<7:41:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7877/12318 [13:38:16<7:41:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7878/12318 [13:38:21<7:41:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7878/12318 [13:38:21<7:41:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7879/12318 [13:38:24<7:41:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7879/12318 [13:38:24<7:41:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7880/12318 [13:38:27<7:40:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7880/12318 [13:38:27<7:40:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7881/12318 [13:38:32<7:40:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7881/12318 [13:38:32<7:40:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7882/12318 [13:38:34<7:40:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7882/12318 [13:38:34<7:40:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7883/12318 [13:38:43<7:40:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7883/12318 [13:38:43<7:40:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7884/12318 [13:38:50<7:40:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7884/12318 [13:38:50<7:40:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7885/12318 [13:38:56<7:40:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7885/12318 [13:38:56<7:40:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7886/12318 [13:39:02<7:40:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7886/12318 [13:39:02<7:40:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7887/12318 [13:39:11<7:40:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7887/12318 [13:39:11<7:40:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7888/12318 [13:39:17<7:40:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7888/12318 [13:39:17<7:40:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7889/12318 [13:39:21<7:40:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7889/12318 [13:39:21<7:40:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7890/12318 [13:39:26<7:39:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7890/12318 [13:39:26<7:39:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7891/12318 [13:39:32<7:39:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7891/12318 [13:39:32<7:39:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7892/12318 [13:39:36<7:39:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7892/12318 [13:39:36<7:39:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7893/12318 [13:39:45<7:39:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7893/12318 [13:39:45<7:39:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7894/12318 [13:39:52<7:39:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7894/12318 [13:39:52<7:39:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7895/12318 [13:39:58<7:39:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7895/12318 [13:39:58<7:39:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7896/12318 [13:40:03<7:39:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7896/12318 [13:40:03<7:39:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7897/12318 [13:40:09<7:39:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7897/12318 [13:40:09<7:39:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7898/12318 [13:40:16<7:39:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7898/12318 [13:40:16<7:39:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7899/12318 [13:40:25<7:38:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7899/12318 [13:40:25<7:38:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7900/12318 [13:40:26<7:38:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7900/12318 [13:40:26<7:38:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7901/12318 [13:40:33<7:38:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7901/12318 [13:40:33<7:38:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7902/12318 [13:40:36<7:38:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7902/12318 [13:40:36<7:38:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7903/12318 [13:40:43<7:38:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7903/12318 [13:40:43<7:38:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7904/12318 [13:41:12<7:38:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7904/12318 [13:41:12<7:38:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7905/12318 [13:41:13<7:38:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7905/12318 [13:41:13<7:38:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7906/12318 [13:41:22<7:38:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7906/12318 [13:41:22<7:38:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7907/12318 [13:41:27<7:38:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7907/12318 [13:41:27<7:38:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7908/12318 [13:41:33<7:38:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7908/12318 [13:41:33<7:38:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7909/12318 [13:41:41<7:38:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7909/12318 [13:41:41<7:38:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7910/12318 [13:41:50<7:37:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7910/12318 [13:41:50<7:37:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7911/12318 [13:41:53<7:37:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7911/12318 [13:41:53<7:37:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7912/12318 [13:42:00<7:37:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7912/12318 [13:42:00<7:37:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7913/12318 [13:42:01<7:37:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7913/12318 [13:42:01<7:37:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7914/12318 [13:42:08<7:37:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7914/12318 [13:42:08<7:37:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7915/12318 [13:42:11<7:37:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7915/12318 [13:42:11<7:37:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7916/12318 [13:42:18<7:37:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7916/12318 [13:42:18<7:37:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7917/12318 [13:42:21<7:37:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7917/12318 [13:42:21<7:37:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7918/12318 [13:42:28<7:37:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7918/12318 [13:42:28<7:37:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7919/12318 [13:42:33<7:36:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7919/12318 [13:42:33<7:36:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7920/12318 [13:42:40<7:36:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7920/12318 [13:42:40<7:36:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7921/12318 [13:42:45<7:36:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7921/12318 [13:42:45<7:36:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7922/12318 [13:42:54<7:36:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7922/12318 [13:42:54<7:36:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7923/12318 [13:43:02<7:36:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7923/12318 [13:43:02<7:36:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7924/12318 [13:43:11<7:36:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7924/12318 [13:43:11<7:36:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7925/12318 [13:43:12<7:36:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7925/12318 [13:43:12<7:36:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7926/12318 [13:43:18<7:36:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7926/12318 [13:43:18<7:36:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7927/12318 [13:43:21<7:36:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7927/12318 [13:43:21<7:36:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7928/12318 [13:43:24<7:35:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7928/12318 [13:43:24<7:35:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7929/12318 [13:43:25<7:35:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7929/12318 [13:43:25<7:35:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7930/12318 [13:43:32<7:35:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7930/12318 [13:43:32<7:35:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7931/12318 [13:43:41<7:35:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7931/12318 [13:43:41<7:35:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7932/12318 [13:43:44<7:35:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7932/12318 [13:43:44<7:35:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7933/12318 [13:43:48<7:35:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7933/12318 [13:43:48<7:35:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7934/12318 [13:43:57<7:35:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7934/12318 [13:43:57<7:35:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7935/12318 [13:44:03<7:35:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7935/12318 [13:44:03<7:35:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7936/12318 [13:44:26<7:35:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7936/12318 [13:44:26<7:35:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7937/12318 [13:44:35<7:35:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7937/12318 [13:44:35<7:35:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7938/12318 [13:44:43<7:35:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7938/12318 [13:44:43<7:35:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7939/12318 [13:44:46<7:34:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7939/12318 [13:44:46<7:34:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7940/12318 [13:44:51<7:34:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7940/12318 [13:44:51<7:34:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7941/12318 [13:44:58<7:34:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7941/12318 [13:44:58<7:34:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7942/12318 [13:45:01<7:34:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7942/12318 [13:45:01<7:34:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7943/12318 [13:45:04<7:34:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7943/12318 [13:45:04<7:34:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7944/12318 [13:45:06<7:34:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7944/12318 [13:45:06<7:34:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  64%|▋| 7945/12318 [13:45:08<7:34:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  64%|▋| 7945/12318 [13:45:08<7:34:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7946/12318 [13:45:12<7:34:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7946/12318 [13:45:12<7:34:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7947/12318 [13:45:20<7:33:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7947/12318 [13:45:20<7:33:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7948/12318 [13:45:29<7:33:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7948/12318 [13:45:29<7:33:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7949/12318 [13:45:34<7:33:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7949/12318 [13:45:34<7:33:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7950/12318 [13:45:37<7:33:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7950/12318 [13:45:37<7:33:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7951/12318 [13:45:45<7:33:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7951/12318 [13:45:45<7:33:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7952/12318 [13:45:50<7:33:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7952/12318 [13:45:50<7:33:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7953/12318 [13:45:55<7:33:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7953/12318 [13:45:55<7:33:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7954/12318 [13:46:00<7:33:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7954/12318 [13:46:00<7:33:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7955/12318 [13:46:09<7:33:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7955/12318 [13:46:09<7:33:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7956/12318 [13:46:17<7:33:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7956/12318 [13:46:17<7:33:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7957/12318 [13:46:20<7:32:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7957/12318 [13:46:20<7:32:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7958/12318 [13:46:24<7:32:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7958/12318 [13:46:24<7:32:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7959/12318 [13:46:27<7:32:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7959/12318 [13:46:27<7:32:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7960/12318 [13:46:35<7:32:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7960/12318 [13:46:35<7:32:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7961/12318 [13:46:39<7:32:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7961/12318 [13:46:39<7:32:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7962/12318 [13:46:41<7:32:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7962/12318 [13:46:41<7:32:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7963/12318 [13:46:49<7:32:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7963/12318 [13:46:49<7:32:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7964/12318 [13:46:56<7:32:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7964/12318 [13:46:56<7:32:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7965/12318 [13:47:01<7:31:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7965/12318 [13:47:01<7:31:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7966/12318 [13:47:08<7:31:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7966/12318 [13:47:08<7:31:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7967/12318 [13:47:15<7:31:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7967/12318 [13:47:15<7:31:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7968/12318 [13:47:46<7:31:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7968/12318 [13:47:46<7:31:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7969/12318 [13:47:50<7:31:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7969/12318 [13:47:50<7:31:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7970/12318 [13:47:59<7:31:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7970/12318 [13:47:59<7:31:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7971/12318 [13:48:04<7:31:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7971/12318 [13:48:04<7:31:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7972/12318 [13:48:12<7:31:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7972/12318 [13:48:12<7:31:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7973/12318 [13:48:17<7:31:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7973/12318 [13:48:17<7:31:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7974/12318 [13:48:18<7:31:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7974/12318 [13:48:18<7:31:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7975/12318 [13:48:24<7:31:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7975/12318 [13:48:24<7:31:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7976/12318 [13:48:31<7:31:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7976/12318 [13:48:31<7:31:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7977/12318 [13:48:34<7:30:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7977/12318 [13:48:34<7:30:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7978/12318 [13:48:36<7:30:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7978/12318 [13:48:36<7:30:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7979/12318 [13:48:41<7:30:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7979/12318 [13:48:41<7:30:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7980/12318 [13:48:47<7:30:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7980/12318 [13:48:47<7:30:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7981/12318 [13:48:55<7:30:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7981/12318 [13:48:55<7:30:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7982/12318 [13:49:03<7:30:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7982/12318 [13:49:03<7:30:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7983/12318 [13:49:07<7:30:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7983/12318 [13:49:07<7:30:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7984/12318 [13:49:14<7:30:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7984/12318 [13:49:14<7:30:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7985/12318 [13:49:22<7:30:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7985/12318 [13:49:22<7:30:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7986/12318 [13:49:31<7:29:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7986/12318 [13:49:31<7:29:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7987/12318 [13:49:34<7:29:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7987/12318 [13:49:34<7:29:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7988/12318 [13:49:36<7:29:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7988/12318 [13:49:36<7:29:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7989/12318 [13:49:45<7:29:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7989/12318 [13:49:45<7:29:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7990/12318 [13:49:49<7:29:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7990/12318 [13:49:49<7:29:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7991/12318 [13:49:58<7:29:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7991/12318 [13:49:58<7:29:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7992/12318 [13:50:05<7:29:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7992/12318 [13:50:05<7:29:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7993/12318 [13:50:11<7:29:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7993/12318 [13:50:11<7:29:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7994/12318 [13:50:19<7:29:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7994/12318 [13:50:19<7:29:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7995/12318 [13:50:26<7:29:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7995/12318 [13:50:26<7:29:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7996/12318 [13:50:31<7:28:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7996/12318 [13:50:31<7:28:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7997/12318 [13:50:37<7:28:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7997/12318 [13:50:37<7:28:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7998/12318 [13:50:40<7:28:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7998/12318 [13:50:40<7:28:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 7999/12318 [13:50:45<7:28:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 7999/12318 [13:50:45<7:28:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8000/12318 [13:51:03<7:28:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8000/12318 [13:51:03<7:28:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8001/12318 [13:51:30<7:28:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8001/12318 [13:51:30<7:28:38,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8002/12318 [13:51:39<7:28:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8002/12318 [13:51:39<7:28:33,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8003/12318 [13:51:42<7:28:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8003/12318 [13:51:42<7:28:25,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8004/12318 [13:51:43<7:28:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8004/12318 [13:51:43<7:28:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8005/12318 [13:51:51<7:28:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8005/12318 [13:51:51<7:28:11,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8006/12318 [13:51:57<7:28:05,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8006/12318 [13:51:57<7:28:05,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8007/12318 [13:52:02<7:27:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8007/12318 [13:52:02<7:27:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8008/12318 [13:52:09<7:27:52,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8008/12318 [13:52:09<7:27:52,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8009/12318 [13:52:16<7:27:46,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8009/12318 [13:52:16<7:27:46,  6.24s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8010/12318 [13:52:22<7:27:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8010/12318 [13:52:22<7:27:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8011/12318 [13:52:25<7:27:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8011/12318 [13:52:25<7:27:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8012/12318 [13:52:26<7:27:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8012/12318 [13:52:26<7:27:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8013/12318 [13:52:33<7:27:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8013/12318 [13:52:33<7:27:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8014/12318 [13:52:34<7:27:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8014/12318 [13:52:34<7:27:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8015/12318 [13:52:37<7:27:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8015/12318 [13:52:37<7:27:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8016/12318 [13:52:39<7:26:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8016/12318 [13:52:39<7:26:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8017/12318 [13:52:48<7:26:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8017/12318 [13:52:48<7:26:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8018/12318 [13:52:54<7:26:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8018/12318 [13:52:54<7:26:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8019/12318 [13:52:59<7:26:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8019/12318 [13:52:59<7:26:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8020/12318 [13:53:06<7:26:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8020/12318 [13:53:06<7:26:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8021/12318 [13:53:11<7:26:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8021/12318 [13:53:11<7:26:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8022/12318 [13:53:17<7:26:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8022/12318 [13:53:17<7:26:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8023/12318 [13:53:21<7:26:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8023/12318 [13:53:21<7:26:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8024/12318 [13:53:25<7:26:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8024/12318 [13:53:25<7:26:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8025/12318 [13:53:33<7:25:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8025/12318 [13:53:33<7:25:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8026/12318 [13:53:37<7:25:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8026/12318 [13:53:37<7:25:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8027/12318 [13:53:41<7:25:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8027/12318 [13:53:41<7:25:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8028/12318 [13:53:45<7:25:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8028/12318 [13:53:45<7:25:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8029/12318 [13:53:52<7:25:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8029/12318 [13:53:52<7:25:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8030/12318 [13:54:00<7:25:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8030/12318 [13:54:00<7:25:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8031/12318 [13:54:07<7:25:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8031/12318 [13:54:07<7:25:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8032/12318 [13:54:38<7:25:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8032/12318 [13:54:38<7:25:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8033/12318 [13:54:42<7:25:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8033/12318 [13:54:42<7:25:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8034/12318 [13:54:49<7:25:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8034/12318 [13:54:49<7:25:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8035/12318 [13:54:52<7:25:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8035/12318 [13:54:52<7:25:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8036/12318 [13:54:54<7:24:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8036/12318 [13:54:54<7:24:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8037/12318 [13:54:58<7:24:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8037/12318 [13:54:58<7:24:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8038/12318 [13:55:02<7:24:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8038/12318 [13:55:02<7:24:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8039/12318 [13:55:09<7:24:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8039/12318 [13:55:09<7:24:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8040/12318 [13:55:16<7:24:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8040/12318 [13:55:16<7:24:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8041/12318 [13:55:19<7:24:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8041/12318 [13:55:19<7:24:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8042/12318 [13:55:22<7:24:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8042/12318 [13:55:22<7:24:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8043/12318 [13:55:30<7:24:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8043/12318 [13:55:30<7:24:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8044/12318 [13:55:37<7:23:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8044/12318 [13:55:37<7:23:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8045/12318 [13:55:43<7:23:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8045/12318 [13:55:43<7:23:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8046/12318 [13:55:49<7:23:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8046/12318 [13:55:49<7:23:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8047/12318 [13:55:56<7:23:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8047/12318 [13:55:56<7:23:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8048/12318 [13:56:02<7:23:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8048/12318 [13:56:02<7:23:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8049/12318 [13:56:09<7:23:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8049/12318 [13:56:09<7:23:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8050/12318 [13:56:15<7:23:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8050/12318 [13:56:15<7:23:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8051/12318 [13:56:17<7:23:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8051/12318 [13:56:17<7:23:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8052/12318 [13:56:26<7:23:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8052/12318 [13:56:26<7:23:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8053/12318 [13:56:34<7:23:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8053/12318 [13:56:34<7:23:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8054/12318 [13:56:43<7:22:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8054/12318 [13:56:43<7:22:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8055/12318 [13:56:46<7:22:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8055/12318 [13:56:46<7:22:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8056/12318 [13:56:53<7:22:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8056/12318 [13:56:53<7:22:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8057/12318 [13:56:58<7:22:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8057/12318 [13:56:58<7:22:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8058/12318 [13:57:01<7:22:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8058/12318 [13:57:01<7:22:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8059/12318 [13:57:04<7:22:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8059/12318 [13:57:04<7:22:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8060/12318 [13:57:08<7:22:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8060/12318 [13:57:08<7:22:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8061/12318 [13:57:09<7:22:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8061/12318 [13:57:09<7:22:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8062/12318 [13:57:12<7:21:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8062/12318 [13:57:12<7:21:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8063/12318 [13:57:13<7:21:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8063/12318 [13:57:13<7:21:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8064/12318 [13:57:58<7:22:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8064/12318 [13:57:58<7:22:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8065/12318 [13:58:01<7:21:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8065/12318 [13:58:01<7:21:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8066/12318 [13:58:05<7:21:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8066/12318 [13:58:05<7:21:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8067/12318 [13:58:13<7:21:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8067/12318 [13:58:13<7:21:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  65%|▋| 8068/12318 [13:58:21<7:21:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  65%|▋| 8068/12318 [13:58:21<7:21:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8069/12318 [13:58:24<7:21:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8069/12318 [13:58:24<7:21:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8070/12318 [13:58:28<7:21:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8070/12318 [13:58:28<7:21:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8071/12318 [13:58:32<7:21:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8071/12318 [13:58:32<7:21:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8072/12318 [13:58:38<7:21:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8072/12318 [13:58:38<7:21:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8073/12318 [13:58:46<7:21:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8073/12318 [13:58:46<7:21:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8074/12318 [13:58:51<7:20:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8074/12318 [13:58:51<7:20:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8075/12318 [13:58:56<7:20:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8075/12318 [13:58:56<7:20:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8076/12318 [13:59:04<7:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8076/12318 [13:59:04<7:20:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8077/12318 [13:59:12<7:20:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8077/12318 [13:59:12<7:20:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8078/12318 [13:59:17<7:20:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8078/12318 [13:59:17<7:20:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8079/12318 [13:59:23<7:20:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8079/12318 [13:59:23<7:20:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8080/12318 [13:59:31<7:20:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8080/12318 [13:59:31<7:20:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8081/12318 [13:59:40<7:20:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8081/12318 [13:59:40<7:20:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8082/12318 [13:59:45<7:20:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8082/12318 [13:59:45<7:20:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8083/12318 [13:59:52<7:20:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8083/12318 [13:59:52<7:20:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8084/12318 [13:59:55<7:19:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8084/12318 [13:59:55<7:19:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8085/12318 [13:59:58<7:19:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8085/12318 [13:59:58<7:19:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8086/12318 [14:00:01<7:19:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8086/12318 [14:00:01<7:19:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8087/12318 [14:00:09<7:19:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8087/12318 [14:00:09<7:19:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8088/12318 [14:00:16<7:19:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8088/12318 [14:00:16<7:19:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8089/12318 [14:00:19<7:19:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8089/12318 [14:00:19<7:19:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8090/12318 [14:00:23<7:19:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8090/12318 [14:00:23<7:19:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8091/12318 [14:00:30<7:19:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8091/12318 [14:00:30<7:19:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8092/12318 [14:00:35<7:18:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8092/12318 [14:00:35<7:18:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8093/12318 [14:00:42<7:18:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8093/12318 [14:00:42<7:18:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8094/12318 [14:00:48<7:18:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8094/12318 [14:00:48<7:18:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8095/12318 [14:00:53<7:18:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8095/12318 [14:00:53<7:18:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8096/12318 [14:01:10<7:18:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8096/12318 [14:01:10<7:18:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8097/12318 [14:01:12<7:18:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8097/12318 [14:01:12<7:18:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8098/12318 [14:01:20<7:18:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8098/12318 [14:01:20<7:18:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8099/12318 [14:01:24<7:18:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8099/12318 [14:01:24<7:18:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8100/12318 [14:01:32<7:18:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8100/12318 [14:01:32<7:18:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8101/12318 [14:01:40<7:18:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8101/12318 [14:01:40<7:18:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8102/12318 [14:01:47<7:18:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8102/12318 [14:01:47<7:18:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8103/12318 [14:01:53<7:17:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8103/12318 [14:01:53<7:17:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8104/12318 [14:01:56<7:17:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8104/12318 [14:01:56<7:17:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8105/12318 [14:02:01<7:17:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8105/12318 [14:02:01<7:17:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8106/12318 [14:02:07<7:17:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8106/12318 [14:02:07<7:17:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8107/12318 [14:02:12<7:17:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8107/12318 [14:02:12<7:17:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8108/12318 [14:02:14<7:17:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8108/12318 [14:02:14<7:17:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8109/12318 [14:02:19<7:17:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8109/12318 [14:02:19<7:17:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8110/12318 [14:02:27<7:17:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8110/12318 [14:02:27<7:17:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8111/12318 [14:02:33<7:17:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8111/12318 [14:02:33<7:17:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8112/12318 [14:02:36<7:16:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8112/12318 [14:02:36<7:16:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8113/12318 [14:02:42<7:16:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8113/12318 [14:02:42<7:16:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8114/12318 [14:02:47<7:16:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8114/12318 [14:02:47<7:16:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8115/12318 [14:02:53<7:16:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8115/12318 [14:02:53<7:16:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8116/12318 [14:03:02<7:16:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8116/12318 [14:03:02<7:16:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8117/12318 [14:03:08<7:16:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8117/12318 [14:03:08<7:16:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8118/12318 [14:03:15<7:16:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8118/12318 [14:03:15<7:16:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8119/12318 [14:03:23<7:16:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8119/12318 [14:03:23<7:16:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8120/12318 [14:03:28<7:16:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8120/12318 [14:03:28<7:16:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8121/12318 [14:03:35<7:15:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8121/12318 [14:03:35<7:15:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8122/12318 [14:03:40<7:15:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8122/12318 [14:03:40<7:15:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8123/12318 [14:03:49<7:15:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8123/12318 [14:03:49<7:15:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8124/12318 [14:03:56<7:15:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8124/12318 [14:03:56<7:15:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8125/12318 [14:04:00<7:15:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8125/12318 [14:04:00<7:15:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8126/12318 [14:04:06<7:15:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8126/12318 [14:04:06<7:15:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8127/12318 [14:04:10<7:15:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8127/12318 [14:04:10<7:15:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8128/12318 [14:04:26<7:15:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8128/12318 [14:04:26<7:15:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8129/12318 [14:04:33<7:15:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8129/12318 [14:04:33<7:15:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8130/12318 [14:04:38<7:15:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8130/12318 [14:04:38<7:15:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8131/12318 [14:04:43<7:14:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8131/12318 [14:04:43<7:14:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8132/12318 [14:04:45<7:14:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8132/12318 [14:04:45<7:14:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8133/12318 [14:04:51<7:14:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8133/12318 [14:04:51<7:14:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8134/12318 [14:05:00<7:14:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8134/12318 [14:05:00<7:14:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8135/12318 [14:05:06<7:14:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8135/12318 [14:05:06<7:14:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8136/12318 [14:05:12<7:14:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8136/12318 [14:05:12<7:14:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8137/12318 [14:05:19<7:14:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8137/12318 [14:05:19<7:14:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8138/12318 [14:05:28<7:14:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8138/12318 [14:05:28<7:14:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8139/12318 [14:05:37<7:14:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8139/12318 [14:05:37<7:14:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8140/12318 [14:05:39<7:14:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8140/12318 [14:05:39<7:14:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8141/12318 [14:05:48<7:13:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8141/12318 [14:05:48<7:13:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8142/12318 [14:05:49<7:13:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8142/12318 [14:05:49<7:13:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8143/12318 [14:05:55<7:13:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8143/12318 [14:05:55<7:13:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8144/12318 [14:05:56<7:13:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8144/12318 [14:05:56<7:13:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8145/12318 [14:06:04<7:13:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8145/12318 [14:06:04<7:13:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8146/12318 [14:06:08<7:13:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8146/12318 [14:06:08<7:13:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8147/12318 [14:06:09<7:13:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8147/12318 [14:06:09<7:13:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8148/12318 [14:06:10<7:13:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8148/12318 [14:06:10<7:13:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8149/12318 [14:06:12<7:12:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8149/12318 [14:06:12<7:12:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8150/12318 [14:06:18<7:12:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8150/12318 [14:06:18<7:12:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8151/12318 [14:06:23<7:12:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8151/12318 [14:06:23<7:12:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8152/12318 [14:06:32<7:12:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8152/12318 [14:06:32<7:12:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8153/12318 [14:06:33<7:12:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8153/12318 [14:06:33<7:12:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8154/12318 [14:06:41<7:12:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8154/12318 [14:06:41<7:12:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8155/12318 [14:06:42<7:12:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8155/12318 [14:06:42<7:12:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8156/12318 [14:06:44<7:12:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8156/12318 [14:06:44<7:12:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8157/12318 [14:06:51<7:11:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8157/12318 [14:06:51<7:11:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8158/12318 [14:06:56<7:11:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8158/12318 [14:06:56<7:11:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8159/12318 [14:07:04<7:11:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8159/12318 [14:07:04<7:11:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8160/12318 [14:07:43<7:11:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8160/12318 [14:07:43<7:11:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8161/12318 [14:07:50<7:11:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8161/12318 [14:07:50<7:11:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8162/12318 [14:07:52<7:11:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8162/12318 [14:07:52<7:11:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8163/12318 [14:07:58<7:11:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8163/12318 [14:07:58<7:11:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8164/12318 [14:08:04<7:11:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8164/12318 [14:08:04<7:11:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8165/12318 [14:08:12<7:11:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8165/12318 [14:08:12<7:11:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8166/12318 [14:08:19<7:11:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8166/12318 [14:08:19<7:11:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8167/12318 [14:08:22<7:11:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8167/12318 [14:08:22<7:11:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8168/12318 [14:08:24<7:11:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8168/12318 [14:08:24<7:11:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8169/12318 [14:08:25<7:10:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8169/12318 [14:08:25<7:10:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8170/12318 [14:08:30<7:10:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8170/12318 [14:08:30<7:10:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8171/12318 [14:08:38<7:10:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8171/12318 [14:08:38<7:10:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8172/12318 [14:08:47<7:10:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8172/12318 [14:08:47<7:10:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8173/12318 [14:08:50<7:10:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8173/12318 [14:08:50<7:10:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8174/12318 [14:08:52<7:10:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8174/12318 [14:08:52<7:10:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8175/12318 [14:08:56<7:10:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8175/12318 [14:08:56<7:10:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8176/12318 [14:09:01<7:10:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8176/12318 [14:09:01<7:10:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8177/12318 [14:09:06<7:10:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8177/12318 [14:09:06<7:10:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8178/12318 [14:09:07<7:09:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8178/12318 [14:09:07<7:09:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8179/12318 [14:09:13<7:09:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8179/12318 [14:09:13<7:09:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8180/12318 [14:09:20<7:09:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8180/12318 [14:09:20<7:09:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8181/12318 [14:09:25<7:09:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8181/12318 [14:09:25<7:09:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8182/12318 [14:09:32<7:09:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8182/12318 [14:09:32<7:09:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8183/12318 [14:09:38<7:09:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8183/12318 [14:09:38<7:09:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8184/12318 [14:09:44<7:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8184/12318 [14:09:44<7:09:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8185/12318 [14:09:47<7:09:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8185/12318 [14:09:47<7:09:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8186/12318 [14:09:56<7:09:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8186/12318 [14:09:56<7:09:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8187/12318 [14:10:00<7:08:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8187/12318 [14:10:00<7:08:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8188/12318 [14:10:01<7:08:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8188/12318 [14:10:01<7:08:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8189/12318 [14:10:06<7:08:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8189/12318 [14:10:06<7:08:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8190/12318 [14:10:11<7:08:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8190/12318 [14:10:11<7:08:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  66%|▋| 8191/12318 [14:10:17<7:08:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  66%|▋| 8191/12318 [14:10:17<7:08:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8192/12318 [14:11:01<7:08:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8192/12318 [14:11:01<7:08:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8193/12318 [14:11:06<7:08:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8193/12318 [14:11:06<7:08:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8194/12318 [14:11:11<7:08:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8194/12318 [14:11:11<7:08:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8195/12318 [14:11:13<7:08:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8195/12318 [14:11:13<7:08:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8196/12318 [14:11:16<7:08:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8196/12318 [14:11:16<7:08:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8197/12318 [14:11:25<7:08:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8197/12318 [14:11:25<7:08:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8198/12318 [14:11:31<7:07:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8198/12318 [14:11:31<7:07:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8199/12318 [14:11:34<7:07:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8199/12318 [14:11:34<7:07:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8200/12318 [14:11:38<7:07:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8200/12318 [14:11:38<7:07:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8201/12318 [14:11:41<7:07:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8201/12318 [14:11:41<7:07:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8202/12318 [14:11:45<7:07:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8202/12318 [14:11:45<7:07:26,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8203/12318 [14:11:49<7:07:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8203/12318 [14:11:49<7:07:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8204/12318 [14:11:58<7:07:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8204/12318 [14:11:58<7:07:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8205/12318 [14:12:05<7:07:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8205/12318 [14:12:05<7:07:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8206/12318 [14:12:10<7:07:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8206/12318 [14:12:10<7:07:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8207/12318 [14:12:16<7:06:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8207/12318 [14:12:16<7:06:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8208/12318 [14:12:17<7:06:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8208/12318 [14:12:17<7:06:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8209/12318 [14:12:23<7:06:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8209/12318 [14:12:23<7:06:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8210/12318 [14:12:24<7:06:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8210/12318 [14:12:24<7:06:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8211/12318 [14:12:28<7:06:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8211/12318 [14:12:28<7:06:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8212/12318 [14:12:30<7:06:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8212/12318 [14:12:30<7:06:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8213/12318 [14:12:35<7:06:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8213/12318 [14:12:35<7:06:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8214/12318 [14:12:36<7:05:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8214/12318 [14:12:36<7:05:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8215/12318 [14:12:40<7:05:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8215/12318 [14:12:40<7:05:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8216/12318 [14:12:43<7:05:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8216/12318 [14:12:43<7:05:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8217/12318 [14:12:51<7:05:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8217/12318 [14:12:51<7:05:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8218/12318 [14:12:57<7:05:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8218/12318 [14:12:57<7:05:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8219/12318 [14:13:02<7:05:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8219/12318 [14:13:02<7:05:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8220/12318 [14:13:08<7:05:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8220/12318 [14:13:08<7:05:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8221/12318 [14:13:17<7:05:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8221/12318 [14:13:17<7:05:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8222/12318 [14:13:25<7:05:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8222/12318 [14:13:25<7:05:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8223/12318 [14:13:28<7:05:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8223/12318 [14:13:28<7:05:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8224/12318 [14:14:15<7:05:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8224/12318 [14:14:15<7:05:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8225/12318 [14:14:18<7:05:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8225/12318 [14:14:18<7:05:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8226/12318 [14:14:24<7:05:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8226/12318 [14:14:24<7:05:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8227/12318 [14:14:31<7:04:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8227/12318 [14:14:31<7:04:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8228/12318 [14:14:38<7:04:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8228/12318 [14:14:38<7:04:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8229/12318 [14:14:47<7:04:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8229/12318 [14:14:47<7:04:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8230/12318 [14:14:54<7:04:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8230/12318 [14:14:54<7:04:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8231/12318 [14:15:03<7:04:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8231/12318 [14:15:03<7:04:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8232/12318 [14:15:05<7:04:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8232/12318 [14:15:05<7:04:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8233/12318 [14:15:10<7:04:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8233/12318 [14:15:10<7:04:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8234/12318 [14:15:18<7:04:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8234/12318 [14:15:18<7:04:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8235/12318 [14:15:21<7:04:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8235/12318 [14:15:21<7:04:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8236/12318 [14:15:26<7:03:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8236/12318 [14:15:26<7:03:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8237/12318 [14:15:32<7:03:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8237/12318 [14:15:32<7:03:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8238/12318 [14:15:41<7:03:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8238/12318 [14:15:41<7:03:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8239/12318 [14:15:49<7:03:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8239/12318 [14:15:49<7:03:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8240/12318 [14:15:58<7:03:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8240/12318 [14:15:58<7:03:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8241/12318 [14:16:03<7:03:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8241/12318 [14:16:03<7:03:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8242/12318 [14:16:10<7:03:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8242/12318 [14:16:10<7:03:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8243/12318 [14:16:17<7:03:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8243/12318 [14:16:17<7:03:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8244/12318 [14:16:25<7:03:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8244/12318 [14:16:25<7:03:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8245/12318 [14:16:32<7:03:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8245/12318 [14:16:32<7:03:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8246/12318 [14:16:34<7:02:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8246/12318 [14:16:34<7:02:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8247/12318 [14:16:39<7:02:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8247/12318 [14:16:39<7:02:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8248/12318 [14:16:42<7:02:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8248/12318 [14:16:42<7:02:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8249/12318 [14:16:48<7:02:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8249/12318 [14:16:48<7:02:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8250/12318 [14:16:51<7:02:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8250/12318 [14:16:51<7:02:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8251/12318 [14:16:56<7:02:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8251/12318 [14:16:56<7:02:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8252/12318 [14:17:04<7:02:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8252/12318 [14:17:04<7:02:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8253/12318 [14:17:06<7:02:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8253/12318 [14:17:06<7:02:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8254/12318 [14:17:13<7:02:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8254/12318 [14:17:13<7:02:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8255/12318 [14:17:18<7:01:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8255/12318 [14:17:18<7:01:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8256/12318 [14:17:35<7:01:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8256/12318 [14:17:35<7:01:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8257/12318 [14:17:43<7:01:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8257/12318 [14:17:43<7:01:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8258/12318 [14:17:47<7:01:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8258/12318 [14:17:47<7:01:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8259/12318 [14:17:49<7:01:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8259/12318 [14:17:49<7:01:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8260/12318 [14:17:57<7:01:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8260/12318 [14:17:57<7:01:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8261/12318 [14:18:05<7:01:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8261/12318 [14:18:05<7:01:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8262/12318 [14:18:07<7:01:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8262/12318 [14:18:07<7:01:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8263/12318 [14:18:15<7:01:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8263/12318 [14:18:15<7:01:11,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8264/12318 [14:18:23<7:01:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8264/12318 [14:18:23<7:01:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8265/12318 [14:18:30<7:00:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8265/12318 [14:18:30<7:00:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8266/12318 [14:18:37<7:00:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8266/12318 [14:18:37<7:00:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8267/12318 [14:18:44<7:00:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8267/12318 [14:18:44<7:00:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8268/12318 [14:18:52<7:00:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8268/12318 [14:18:52<7:00:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8269/12318 [14:18:55<7:00:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8269/12318 [14:18:55<7:00:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8270/12318 [14:18:59<7:00:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8270/12318 [14:18:59<7:00:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8271/12318 [14:19:04<7:00:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8271/12318 [14:19:04<7:00:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8272/12318 [14:19:13<7:00:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8272/12318 [14:19:13<7:00:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8273/12318 [14:19:20<7:00:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8273/12318 [14:19:20<7:00:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8274/12318 [14:19:23<7:00:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8274/12318 [14:19:23<7:00:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8275/12318 [14:19:27<6:59:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8275/12318 [14:19:27<6:59:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8276/12318 [14:19:36<6:59:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8276/12318 [14:19:36<6:59:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8277/12318 [14:19:42<6:59:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8277/12318 [14:19:42<6:59:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8278/12318 [14:19:48<6:59:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8278/12318 [14:19:48<6:59:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8279/12318 [14:19:52<6:59:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8279/12318 [14:19:52<6:59:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8280/12318 [14:19:58<6:59:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8280/12318 [14:19:58<6:59:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8281/12318 [14:20:07<6:59:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8281/12318 [14:20:07<6:59:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8282/12318 [14:20:15<6:59:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8282/12318 [14:20:15<6:59:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8283/12318 [14:20:22<6:59:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8283/12318 [14:20:22<6:59:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8284/12318 [14:20:29<6:59:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8284/12318 [14:20:29<6:59:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8285/12318 [14:20:35<6:58:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8285/12318 [14:20:35<6:58:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8286/12318 [14:20:40<6:58:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8286/12318 [14:20:40<6:58:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8287/12318 [14:20:46<6:58:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8287/12318 [14:20:46<6:58:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8288/12318 [14:21:08<6:58:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8288/12318 [14:21:08<6:58:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8289/12318 [14:21:14<6:58:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8289/12318 [14:21:14<6:58:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8290/12318 [14:21:22<6:58:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8290/12318 [14:21:22<6:58:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8291/12318 [14:21:23<6:58:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8291/12318 [14:21:23<6:58:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8292/12318 [14:21:29<6:58:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8292/12318 [14:21:29<6:58:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8293/12318 [14:21:34<6:58:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8293/12318 [14:21:34<6:58:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8294/12318 [14:21:37<6:58:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8294/12318 [14:21:37<6:58:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8295/12318 [14:21:42<6:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8295/12318 [14:21:42<6:57:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8296/12318 [14:21:50<6:57:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8296/12318 [14:21:50<6:57:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8297/12318 [14:21:55<6:57:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8297/12318 [14:21:55<6:57:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8298/12318 [14:22:01<6:57:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8298/12318 [14:22:01<6:57:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8299/12318 [14:22:08<6:57:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8299/12318 [14:22:08<6:57:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8300/12318 [14:22:13<6:57:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8300/12318 [14:22:13<6:57:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8301/12318 [14:22:18<6:57:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8301/12318 [14:22:18<6:57:17,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8302/12318 [14:22:23<6:57:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8302/12318 [14:22:23<6:57:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8303/12318 [14:22:32<6:57:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8303/12318 [14:22:32<6:57:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8304/12318 [14:22:37<6:56:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8304/12318 [14:22:37<6:56:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8305/12318 [14:22:41<6:56:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8305/12318 [14:22:41<6:56:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8306/12318 [14:22:43<6:56:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8306/12318 [14:22:43<6:56:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8307/12318 [14:22:46<6:56:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8307/12318 [14:22:46<6:56:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8308/12318 [14:22:53<6:56:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8308/12318 [14:22:53<6:56:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8309/12318 [14:22:57<6:56:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8309/12318 [14:22:57<6:56:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8310/12318 [14:23:03<6:56:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8310/12318 [14:23:03<6:56:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8311/12318 [14:23:11<6:56:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8311/12318 [14:23:11<6:56:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8312/12318 [14:23:15<6:56:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8312/12318 [14:23:15<6:56:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8313/12318 [14:23:16<6:55:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8313/12318 [14:23:16<6:55:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  67%|▋| 8314/12318 [14:23:24<6:55:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  67%|▋| 8314/12318 [14:23:24<6:55:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8315/12318 [14:23:32<6:55:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8315/12318 [14:23:32<6:55:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8316/12318 [14:23:38<6:55:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8316/12318 [14:23:38<6:55:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8317/12318 [14:23:43<6:55:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8317/12318 [14:23:43<6:55:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8318/12318 [14:23:52<6:55:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8318/12318 [14:23:52<6:55:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8319/12318 [14:23:58<6:55:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8319/12318 [14:23:58<6:55:19,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8320/12318 [14:24:22<6:55:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8320/12318 [14:24:22<6:55:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8321/12318 [14:24:28<6:55:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8321/12318 [14:24:28<6:55:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8322/12318 [14:24:32<6:55:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8322/12318 [14:24:32<6:55:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8323/12318 [14:24:36<6:55:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8323/12318 [14:24:36<6:55:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8324/12318 [14:24:37<6:54:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8324/12318 [14:24:37<6:54:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8325/12318 [14:24:38<6:54:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8325/12318 [14:24:38<6:54:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8326/12318 [14:24:45<6:54:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8326/12318 [14:24:45<6:54:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8327/12318 [14:24:50<6:54:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8327/12318 [14:24:50<6:54:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8328/12318 [14:24:55<6:54:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8328/12318 [14:24:55<6:54:23,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8329/12318 [14:25:01<6:54:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8329/12318 [14:25:01<6:54:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8330/12318 [14:25:06<6:54:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8330/12318 [14:25:06<6:54:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8331/12318 [14:25:14<6:54:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8331/12318 [14:25:14<6:54:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8332/12318 [14:25:21<6:53:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8332/12318 [14:25:21<6:53:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8333/12318 [14:25:26<6:53:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8333/12318 [14:25:26<6:53:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8334/12318 [14:25:31<6:53:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8334/12318 [14:25:31<6:53:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8335/12318 [14:25:32<6:53:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8335/12318 [14:25:32<6:53:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8336/12318 [14:25:36<6:53:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8336/12318 [14:25:36<6:53:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8337/12318 [14:25:41<6:53:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8337/12318 [14:25:41<6:53:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8338/12318 [14:25:45<6:53:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8338/12318 [14:25:45<6:53:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8339/12318 [14:25:54<6:53:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8339/12318 [14:25:54<6:53:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8340/12318 [14:25:59<6:53:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8340/12318 [14:25:59<6:53:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8341/12318 [14:26:06<6:52:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8341/12318 [14:26:06<6:52:57,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8342/12318 [14:26:11<6:52:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8342/12318 [14:26:11<6:52:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8343/12318 [14:26:14<6:52:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8343/12318 [14:26:14<6:52:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8344/12318 [14:26:22<6:52:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8344/12318 [14:26:22<6:52:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8345/12318 [14:26:28<6:52:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8345/12318 [14:26:28<6:52:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8346/12318 [14:26:33<6:52:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8346/12318 [14:26:33<6:52:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8347/12318 [14:26:36<6:52:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8347/12318 [14:26:36<6:52:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8348/12318 [14:26:41<6:52:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8348/12318 [14:26:41<6:52:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8349/12318 [14:26:42<6:52:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8349/12318 [14:26:42<6:52:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8350/12318 [14:26:47<6:51:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8350/12318 [14:26:47<6:51:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8351/12318 [14:26:54<6:51:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8351/12318 [14:26:54<6:51:48,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8352/12318 [14:27:36<6:51:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8352/12318 [14:27:36<6:51:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8353/12318 [14:27:40<6:51:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8353/12318 [14:27:40<6:51:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8354/12318 [14:27:47<6:51:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8354/12318 [14:27:47<6:51:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8355/12318 [14:27:51<6:51:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8355/12318 [14:27:51<6:51:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8356/12318 [14:27:54<6:51:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8356/12318 [14:27:54<6:51:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8357/12318 [14:28:02<6:51:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8357/12318 [14:28:02<6:51:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8358/12318 [14:28:07<6:51:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8358/12318 [14:28:07<6:51:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8359/12318 [14:28:15<6:51:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8359/12318 [14:28:15<6:51:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8360/12318 [14:28:18<6:51:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8360/12318 [14:28:18<6:51:05,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8361/12318 [14:28:21<6:50:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8361/12318 [14:28:21<6:50:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8362/12318 [14:28:29<6:50:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8362/12318 [14:28:29<6:50:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8363/12318 [14:28:34<6:50:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8363/12318 [14:28:34<6:50:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8364/12318 [14:28:37<6:50:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8364/12318 [14:28:37<6:50:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8365/12318 [14:28:40<6:50:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8365/12318 [14:28:40<6:50:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8366/12318 [14:28:42<6:50:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8366/12318 [14:28:42<6:50:21,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8367/12318 [14:28:46<6:50:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8367/12318 [14:28:46<6:50:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8368/12318 [14:28:49<6:50:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8368/12318 [14:28:49<6:50:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8369/12318 [14:28:54<6:50:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8369/12318 [14:28:54<6:50:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8370/12318 [14:28:58<6:49:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8370/12318 [14:28:58<6:49:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8371/12318 [14:29:06<6:49:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8371/12318 [14:29:06<6:49:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8372/12318 [14:29:10<6:49:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8372/12318 [14:29:10<6:49:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8373/12318 [14:29:17<6:49:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8373/12318 [14:29:17<6:49:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8374/12318 [14:29:21<6:49:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8374/12318 [14:29:21<6:49:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8375/12318 [14:29:30<6:49:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8375/12318 [14:29:30<6:49:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8376/12318 [14:29:33<6:49:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8376/12318 [14:29:33<6:49:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8377/12318 [14:29:38<6:49:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8377/12318 [14:29:38<6:49:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8378/12318 [14:29:40<6:48:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8378/12318 [14:29:40<6:48:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8379/12318 [14:29:44<6:48:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8379/12318 [14:29:44<6:48:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8380/12318 [14:29:52<6:48:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8380/12318 [14:29:52<6:48:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8381/12318 [14:30:01<6:48:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8381/12318 [14:30:01<6:48:41,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8382/12318 [14:30:06<6:48:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8382/12318 [14:30:06<6:48:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8383/12318 [14:30:11<6:48:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8383/12318 [14:30:11<6:48:28,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8384/12318 [14:30:47<6:48:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8384/12318 [14:30:47<6:48:35,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8385/12318 [14:30:52<6:48:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8385/12318 [14:30:52<6:48:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8386/12318 [14:30:53<6:48:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8386/12318 [14:30:53<6:48:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8387/12318 [14:30:58<6:48:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8387/12318 [14:30:58<6:48:13,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8388/12318 [14:31:03<6:48:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8388/12318 [14:31:03<6:48:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8389/12318 [14:31:10<6:48:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8389/12318 [14:31:10<6:48:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8390/12318 [14:31:19<6:47:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8390/12318 [14:31:19<6:47:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8391/12318 [14:31:24<6:47:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8391/12318 [14:31:24<6:47:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8392/12318 [14:31:30<6:47:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8392/12318 [14:31:30<6:47:42,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8393/12318 [14:31:37<6:47:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8393/12318 [14:31:37<6:47:37,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8394/12318 [14:31:43<6:47:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8394/12318 [14:31:43<6:47:30,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8395/12318 [14:31:49<6:47:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8395/12318 [14:31:49<6:47:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8396/12318 [14:31:56<6:47:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8396/12318 [14:31:56<6:47:18,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8397/12318 [14:31:59<6:47:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8397/12318 [14:31:59<6:47:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8398/12318 [14:32:03<6:47:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8398/12318 [14:32:03<6:47:03,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8399/12318 [14:32:12<6:46:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8399/12318 [14:32:12<6:46:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8400/12318 [14:32:16<6:46:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8400/12318 [14:32:16<6:46:51,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8401/12318 [14:32:21<6:46:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8401/12318 [14:32:21<6:46:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8402/12318 [14:32:27<6:46:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8402/12318 [14:32:27<6:46:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8403/12318 [14:32:32<6:46:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8403/12318 [14:32:32<6:46:31,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8404/12318 [14:32:37<6:46:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8404/12318 [14:32:37<6:46:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8405/12318 [14:32:39<6:46:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8405/12318 [14:32:39<6:46:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8406/12318 [14:32:41<6:46:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8406/12318 [14:32:41<6:46:08,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8407/12318 [14:32:47<6:46:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8407/12318 [14:32:47<6:46:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8408/12318 [14:32:50<6:45:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8408/12318 [14:32:50<6:45:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8409/12318 [14:32:55<6:45:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8409/12318 [14:32:55<6:45:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8410/12318 [14:33:01<6:45:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8410/12318 [14:33:01<6:45:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8411/12318 [14:33:03<6:45:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8411/12318 [14:33:03<6:45:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8412/12318 [14:33:07<6:45:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8412/12318 [14:33:07<6:45:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8413/12318 [14:33:08<6:45:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8413/12318 [14:33:08<6:45:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8414/12318 [14:33:13<6:45:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8414/12318 [14:33:13<6:45:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8415/12318 [14:33:17<6:45:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8415/12318 [14:33:17<6:45:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8416/12318 [14:34:03<6:45:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8416/12318 [14:34:03<6:45:15,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8417/12318 [14:34:07<6:45:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8417/12318 [14:34:07<6:45:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8418/12318 [14:34:12<6:45:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8418/12318 [14:34:12<6:45:00,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8419/12318 [14:34:15<6:44:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8419/12318 [14:34:15<6:44:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8420/12318 [14:34:22<6:44:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8420/12318 [14:34:22<6:44:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8421/12318 [14:34:28<6:44:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8421/12318 [14:34:28<6:44:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8422/12318 [14:34:34<6:44:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8422/12318 [14:34:34<6:44:34,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8423/12318 [14:34:38<6:44:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8423/12318 [14:34:38<6:44:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8424/12318 [14:34:42<6:44:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8424/12318 [14:34:42<6:44:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8425/12318 [14:34:45<6:44:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8425/12318 [14:34:45<6:44:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8426/12318 [14:34:52<6:44:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8426/12318 [14:34:52<6:44:06,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8427/12318 [14:34:55<6:43:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8427/12318 [14:34:55<6:43:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8428/12318 [14:35:03<6:43:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8428/12318 [14:35:03<6:43:53,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8429/12318 [14:35:08<6:43:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8429/12318 [14:35:08<6:43:46,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8430/12318 [14:35:10<6:43:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8430/12318 [14:35:10<6:43:38,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8431/12318 [14:35:11<6:43:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8431/12318 [14:35:11<6:43:29,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8432/12318 [14:35:15<6:43:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8432/12318 [14:35:15<6:43:22,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8433/12318 [14:35:17<6:43:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8433/12318 [14:35:17<6:43:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8434/12318 [14:35:22<6:43:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8434/12318 [14:35:22<6:43:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8435/12318 [14:35:24<6:42:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8435/12318 [14:35:24<6:42:59,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8436/12318 [14:35:33<6:42:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8436/12318 [14:35:33<6:42:54,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  68%|▋| 8437/12318 [14:35:37<6:42:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  68%|▋| 8437/12318 [14:35:37<6:42:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8438/12318 [14:35:41<6:42:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8438/12318 [14:35:41<6:42:40,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8439/12318 [14:35:44<6:42:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8439/12318 [14:35:44<6:42:32,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8440/12318 [14:35:46<6:42:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8440/12318 [14:35:46<6:42:24,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8441/12318 [14:35:49<6:42:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8441/12318 [14:35:49<6:42:16,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8442/12318 [14:35:56<6:42:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8442/12318 [14:35:56<6:42:10,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8443/12318 [14:36:02<6:42:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8443/12318 [14:36:02<6:42:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8444/12318 [14:36:10<6:41:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8444/12318 [14:36:10<6:41:58,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8445/12318 [14:36:15<6:41:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8445/12318 [14:36:15<6:41:52,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8446/12318 [14:36:21<6:41:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8446/12318 [14:36:21<6:41:45,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8447/12318 [14:36:29<6:41:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8447/12318 [14:36:29<6:41:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8448/12318 [14:37:41<6:42:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8448/12318 [14:37:41<6:42:04,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8449/12318 [14:37:43<6:41:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8449/12318 [14:37:43<6:41:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8450/12318 [14:37:49<6:41:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8450/12318 [14:37:49<6:41:49,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8451/12318 [14:37:58<6:41:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8451/12318 [14:37:58<6:41:44,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8452/12318 [14:38:00<6:41:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8452/12318 [14:38:00<6:41:36,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8453/12318 [14:38:01<6:41:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8453/12318 [14:38:01<6:41:27,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8454/12318 [14:38:05<6:41:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8454/12318 [14:38:05<6:41:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8455/12318 [14:38:11<6:41:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8455/12318 [14:38:11<6:41:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8456/12318 [14:38:16<6:41:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8456/12318 [14:38:16<6:41:07,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8457/12318 [14:38:24<6:41:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8457/12318 [14:38:24<6:41:01,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8458/12318 [14:38:29<6:40:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8458/12318 [14:38:29<6:40:55,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8459/12318 [14:38:33<6:40:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8459/12318 [14:38:33<6:40:47,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8460/12318 [14:38:35<6:40:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8460/12318 [14:38:35<6:40:39,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8461/12318 [14:38:41<6:40:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8461/12318 [14:38:41<6:40:33,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8462/12318 [14:38:43<6:40:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8462/12318 [14:38:43<6:40:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8463/12318 [14:38:52<6:40:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8463/12318 [14:38:52<6:40:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8464/12318 [14:38:54<6:40:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8464/12318 [14:38:54<6:40:12,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  69%|▋| 8465/12318 [14:39:01<6:40:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8465/12318 [14:39:01<6:40:06,  6.23s/it, v_num=e4xv, train/loss"
+      "        v5-L96-D102  63%[===========>        ]   1.67G  40.7MB/s    eta 25s    "
      ]
     },
     {
@@ -101916,8 +1912,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8466/12318 [14:39:05<6:39:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8466/12318 [14:39:05<6:39:58,  6.23s/it, v_num=e4xv, train/loss"
+      "       v5-L96-D1024  63%[===========>        ]   1.67G  40.2MB/s    eta 25s    "
      ]
     },
     {
@@ -101925,8 +1920,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8467/12318 [14:39:11<6:39:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8467/12318 [14:39:11<6:39:52,  6.23s/it, v_num=e4xv, train/loss"
+      "      v5-L96-D1024-  63%[===========>        ]   1.68G  40.6MB/s    eta 25s    "
      ]
     },
     {
@@ -101934,7 +1928,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8468/12318 [14:39:18<6:39:46,  6.23s/it, v_num=e4xv, train/loss"
+      "     v5-L96-D1024-E  64%[===========>        ]   1.69G  40.3MB/s    eta 25s    "
      ]
     },
     {
@@ -101942,7 +1936,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8468/12318 [14:39:18<6:39:46,  6.23s/it, v_num=e4xv, train/loss"
+      "    v5-L96-D1024-E0  64%[===========>        ]   1.70G  40.5MB/s    eta 24s    "
      ]
     },
     {
@@ -101950,8 +1944,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8469/12318 [14:39:23<6:39:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8469/12318 [14:39:23<6:39:40,  6.23s/it, v_num=e4xv, train/loss"
+      "   v5-L96-D1024-E0_  64%[===========>        ]   1.71G  40.5MB/s    eta 24s    "
      ]
     },
     {
@@ -101959,8 +1952,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8470/12318 [14:39:29<6:39:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8470/12318 [14:39:29<6:39:33,  6.23s/it, v_num=e4xv, train/loss"
+      "  v5-L96-D1024-E0_1  65%[============>       ]   1.72G  40.9MB/s    eta 24s    "
      ]
     },
     {
@@ -101968,8 +1960,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8471/12318 [14:39:34<6:39:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8471/12318 [14:39:34<6:39:27,  6.23s/it, v_num=e4xv, train/loss"
+      " v5-L96-D1024-E0_1-  65%[============>       ]   1.73G  40.6MB/s    eta 24s    "
      ]
     },
     {
@@ -101977,8 +1968,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8472/12318 [14:39:40<6:39:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8472/12318 [14:39:40<6:39:20,  6.23s/it, v_num=e4xv, train/loss"
+      "v5-L96-D1024-E0_1-m  65%[============>       ]   1.73G  40.4MB/s    eta 24s    "
      ]
     },
     {
@@ -101986,8 +1976,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8473/12318 [14:39:45<6:39:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8473/12318 [14:39:45<6:39:13,  6.23s/it, v_num=e4xv, train/loss"
+      "5-L96-D1024-E0_1-me  66%[============>       ]   1.74G  39.9MB/s    eta 23s    "
      ]
     },
     {
@@ -101995,8 +1984,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8474/12318 [14:39:50<6:39:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8474/12318 [14:39:50<6:39:06,  6.23s/it, v_num=e4xv, train/loss"
+      "-L96-D1024-E0_1-mem  66%[============>       ]   1.75G  39.8MB/s    eta 23s    "
      ]
     },
     {
@@ -102004,8 +1992,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8475/12318 [14:39:56<6:39:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8475/12318 [14:39:56<6:39:00,  6.23s/it, v_num=e4xv, train/loss"
+      "L96-D1024-E0_1-mem-  66%[============>       ]   1.75G  37.0MB/s    eta 23s    "
      ]
     },
     {
@@ -102013,8 +2000,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8476/12318 [14:40:03<6:38:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8476/12318 [14:40:03<6:38:54,  6.23s/it, v_num=e4xv, train/loss"
+      "96-D1024-E0_1-mem-c  67%[============>       ]   1.76G  39.3MB/s    eta 23s    "
      ]
     },
     {
@@ -102022,8 +2008,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8477/12318 [14:40:09<6:38:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8477/12318 [14:40:09<6:38:48,  6.23s/it, v_num=e4xv, train/loss"
+      "6-D1024-E0_1-mem-ct  67%[============>       ]   1.77G  39.7MB/s    eta 23s    "
      ]
     },
     {
@@ -102031,8 +2016,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8478/12318 [14:40:12<6:38:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8478/12318 [14:40:12<6:38:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-D1024-E0_1-mem-ctx  67%[============>       ]   1.78G  38.8MB/s    eta 22s    "
      ]
     },
     {
@@ -102040,8 +2024,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8479/12318 [14:40:16<6:38:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8479/12318 [14:40:16<6:38:33,  6.23s/it, v_num=e4xv, train/loss"
+      "D1024-E0_1-mem-ctx-  67%[============>       ]   1.79G  38.9MB/s    eta 22s    "
      ]
     },
     {
@@ -102049,8 +2032,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8480/12318 [14:40:55<6:38:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8480/12318 [14:40:55<6:38:42,  6.23s/it, v_num=e4xv, train/loss"
+      "1024-E0_1-mem-ctx-4  68%[============>       ]   1.79G  38.7MB/s    eta 22s    "
      ]
     },
     {
@@ -102058,8 +2040,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8481/12318 [14:40:59<6:38:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8481/12318 [14:40:59<6:38:34,  6.23s/it, v_num=e4xv, train/loss"
+      "024-E0_1-mem-ctx-4k  68%[============>       ]   1.81G  39.0MB/s    eta 22s    "
      ]
     },
     {
@@ -102067,8 +2048,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8482/12318 [14:41:07<6:38:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8482/12318 [14:41:07<6:38:29,  6.23s/it, v_num=e4xv, train/loss"
+      "24-E0_1-mem-ctx-4k.  68%[============>       ]   1.81G  39.4MB/s    eta 22s    "
      ]
     },
     {
@@ -102076,8 +2056,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8483/12318 [14:41:14<6:38:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8483/12318 [14:41:14<6:38:23,  6.23s/it, v_num=e4xv, train/loss"
+      "4-E0_1-mem-ctx-4k.p  69%[============>       ]   1.82G  39.3MB/s    eta 21s    "
      ]
     },
     {
@@ -102085,8 +2064,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8484/12318 [14:41:17<6:38:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8484/12318 [14:41:17<6:38:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-E0_1-mem-ctx-4k.pt  69%[============>       ]   1.83G  38.9MB/s    eta 21s    "
      ]
     },
     {
@@ -102094,8 +2072,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8485/12318 [14:41:25<6:38:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8485/12318 [14:41:25<6:38:10,  6.23s/it, v_num=e4xv, train/loss"
+      "E0_1-mem-ctx-4k.pth  69%[============>       ]   1.84G  39.5MB/s    eta 21s    "
      ]
     },
     {
@@ -102103,8 +2080,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8486/12318 [14:41:32<6:38:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8486/12318 [14:41:32<6:38:04,  6.23s/it, v_num=e4xv, train/loss"
+      "0_1-mem-ctx-4k.pth   70%[=============>      ]   1.85G  38.9MB/s    eta 21s    "
      ]
     },
     {
@@ -102112,8 +2088,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8487/12318 [14:41:39<6:37:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8487/12318 [14:41:39<6:37:58,  6.23s/it, v_num=e4xv, train/loss"
+      "_1-mem-ctx-4k.pth    70%[=============>      ]   1.85G  39.1MB/s    eta 21s    "
      ]
     },
     {
@@ -102121,8 +2096,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8488/12318 [14:41:46<6:37:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8488/12318 [14:41:46<6:37:52,  6.23s/it, v_num=e4xv, train/loss"
+      "1-mem-ctx-4k.pth     70%[=============>      ]   1.86G  38.3MB/s    eta 20s    "
      ]
     },
     {
@@ -102130,8 +2104,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8489/12318 [14:41:51<6:37:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8489/12318 [14:41:51<6:37:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-mem-ctx-4k.pth      70%[=============>      ]   1.87G  38.7MB/s    eta 20s    "
      ]
     },
     {
@@ -102139,8 +2112,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8490/12318 [14:41:56<6:37:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8490/12318 [14:41:56<6:37:38,  6.23s/it, v_num=e4xv, train/loss"
+      "mem-ctx-4k.pth       71%[=============>      ]   1.87G  38.1MB/s    eta 20s    "
      ]
     },
     {
@@ -102148,8 +2120,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8491/12318 [14:42:04<6:37:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8491/12318 [14:42:04<6:37:33,  6.23s/it, v_num=e4xv, train/loss"
+      "em-ctx-4k.pth        71%[=============>      ]   1.88G  38.3MB/s    eta 20s    "
      ]
     },
     {
@@ -102157,8 +2128,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8492/12318 [14:42:13<6:37:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8492/12318 [14:42:13<6:37:28,  6.23s/it, v_num=e4xv, train/loss"
+      "m-ctx-4k.pth         71%[=============>      ]   1.89G  39.9MB/s    eta 20s    "
      ]
     },
     {
@@ -102166,8 +2136,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8493/12318 [14:42:18<6:37:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8493/12318 [14:42:18<6:37:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-ctx-4k.pth          72%[=============>      ]   1.90G  38.3MB/s    eta 19s    "
      ]
     },
     {
@@ -102175,8 +2144,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8494/12318 [14:42:25<6:37:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8494/12318 [14:42:25<6:37:16,  6.23s/it, v_num=e4xv, train/loss"
+      "ctx-4k.pth           72%[=============>      ]   1.91G  38.7MB/s    eta 19s    "
      ]
     },
     {
@@ -102184,8 +2152,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8495/12318 [14:42:32<6:37:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8495/12318 [14:42:32<6:37:10,  6.23s/it, v_num=e4xv, train/loss"
+      "tx-4k.pth            72%[=============>      ]   1.91G  38.6MB/s    eta 19s    "
      ]
     },
     {
@@ -102193,8 +2160,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8496/12318 [14:42:35<6:37:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8496/12318 [14:42:35<6:37:02,  6.23s/it, v_num=e4xv, train/loss"
+      "x-4k.pth             73%[=============>      ]   1.92G  39.5MB/s    eta 19s    "
      ]
     },
     {
@@ -102202,8 +2168,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8497/12318 [14:42:40<6:36:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8497/12318 [14:42:40<6:36:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-4k.pth              73%[=============>      ]   1.93G  39.2MB/s    eta 19s    "
      ]
     },
     {
@@ -102211,8 +2176,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8498/12318 [14:42:47<6:36:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8498/12318 [14:42:47<6:36:49,  6.23s/it, v_num=e4xv, train/loss"
+      "4k.pth               73%[=============>      ]   1.94G  38.9MB/s    eta 18s    "
      ]
     },
     {
@@ -102220,8 +2184,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8499/12318 [14:42:51<6:36:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8499/12318 [14:42:51<6:36:42,  6.23s/it, v_num=e4xv, train/loss"
+      "k.pth                74%[=============>      ]   1.95G  39.1MB/s    eta 18s    "
      ]
     },
     {
@@ -102229,8 +2192,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8500/12318 [14:42:53<6:36:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8500/12318 [14:42:53<6:36:34,  6.23s/it, v_num=e4xv, train/loss"
+      ".pth                 74%[=============>      ]   1.96G  39.1MB/s    eta 18s    "
      ]
     },
     {
@@ -102238,8 +2200,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8501/12318 [14:42:59<6:36:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8501/12318 [14:42:59<6:36:28,  6.23s/it, v_num=e4xv, train/loss"
+      "pth                  74%[=============>      ]   1.97G  39.4MB/s    eta 18s    "
      ]
     },
     {
@@ -102247,8 +2208,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8502/12318 [14:43:06<6:36:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8502/12318 [14:43:06<6:36:22,  6.23s/it, v_num=e4xv, train/loss"
+      "th                   75%[==============>     ]   1.97G  38.7MB/s    eta 18s    "
      ]
     },
     {
@@ -102256,8 +2216,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8503/12318 [14:43:07<6:36:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8503/12318 [14:43:07<6:36:13,  6.23s/it, v_num=e4xv, train/loss"
+      "h                    75%[==============>     ]   1.98G  39.3MB/s    eta 17s    "
      ]
     },
     {
@@ -102265,8 +2224,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8504/12318 [14:43:16<6:36:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8504/12318 [14:43:16<6:36:08,  6.23s/it, v_num=e4xv, train/loss"
+      "                     75%[==============>     ]   1.99G  40.3MB/s    eta 17s    "
      ]
     },
     {
@@ -102274,8 +2232,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8505/12318 [14:43:20<6:36:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8505/12318 [14:43:20<6:36:01,  6.23s/it, v_num=e4xv, train/loss"
+      "                  v  76%[==============>     ]   2.00G  40.0MB/s    eta 17s    "
      ]
     },
     {
@@ -102283,8 +2240,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8506/12318 [14:43:23<6:35:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8506/12318 [14:43:23<6:35:53,  6.23s/it, v_num=e4xv, train/loss"
+      "                 v5  76%[==============>     ]   2.01G  40.8MB/s    eta 17s    "
      ]
     },
     {
@@ -102292,8 +2248,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8507/12318 [14:43:31<6:35:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8507/12318 [14:43:31<6:35:48,  6.23s/it, v_num=e4xv, train/loss"
+      "                v5-  76%[==============>     ]   2.02G  40.8MB/s    eta 17s    "
      ]
     },
     {
@@ -102301,8 +2256,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8508/12318 [14:43:36<6:35:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8508/12318 [14:43:36<6:35:41,  6.23s/it, v_num=e4xv, train/loss"
+      "               v5-L  77%[==============>     ]   2.03G  40.8MB/s    eta 16s    "
      ]
     },
     {
@@ -102310,8 +2264,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8509/12318 [14:43:38<6:35:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8509/12318 [14:43:38<6:35:33,  6.23s/it, v_num=e4xv, train/loss"
+      "              v5-L9  77%[==============>     ]   2.03G  41.2MB/s    eta 16s    "
      ]
     },
     {
@@ -102319,8 +2272,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8510/12318 [14:43:42<6:35:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8510/12318 [14:43:42<6:35:26,  6.23s/it, v_num=e4xv, train/loss"
+      "             v5-L96  77%[==============>     ]   2.04G  41.2MB/s    eta 16s    "
      ]
     },
     {
@@ -102328,8 +2280,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8511/12318 [14:43:49<6:35:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8511/12318 [14:43:49<6:35:20,  6.23s/it, v_num=e4xv, train/loss"
+      "            v5-L96-  77%[==============>     ]   2.05G  41.8MB/s    eta 16s    "
      ]
     },
     {
@@ -102337,8 +2288,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8512/12318 [14:44:06<6:35:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8512/12318 [14:44:06<6:35:18,  6.23s/it, v_num=e4xv, train/loss"
+      "           v5-L96-D  78%[==============>     ]   2.06G  40.4MB/s    eta 16s    "
      ]
     },
     {
@@ -102346,8 +2296,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8513/12318 [14:44:15<6:35:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8513/12318 [14:44:15<6:35:13,  6.23s/it, v_num=e4xv, train/loss"
+      "          v5-L96-D1  78%[==============>     ]   2.06G  39.9MB/s    eta 15s    "
      ]
     },
     {
@@ -102355,8 +2304,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8514/12318 [14:44:21<6:35:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8514/12318 [14:44:21<6:35:07,  6.23s/it, v_num=e4xv, train/loss"
+      "         v5-L96-D10  78%[==============>     ]   2.08G  41.5MB/s    eta 15s    "
      ]
     },
     {
@@ -102364,8 +2312,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8515/12318 [14:44:26<6:35:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8515/12318 [14:44:26<6:35:00,  6.23s/it, v_num=e4xv, train/loss"
+      "        v5-L96-D102  79%[==============>     ]   2.08G  40.8MB/s    eta 15s    "
      ]
     },
     {
@@ -102373,8 +2320,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8516/12318 [14:44:31<6:34:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8516/12318 [14:44:31<6:34:53,  6.23s/it, v_num=e4xv, train/loss"
+      "       v5-L96-D1024  79%[==============>     ]   2.09G  40.5MB/s    eta 15s    "
      ]
     },
     {
@@ -102382,8 +2328,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8517/12318 [14:44:36<6:34:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8517/12318 [14:44:36<6:34:47,  6.23s/it, v_num=e4xv, train/loss"
+      "      v5-L96-D1024-  79%[==============>     ]   2.10G  39.8MB/s    eta 15s    "
      ]
     },
     {
@@ -102391,8 +2336,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8518/12318 [14:44:44<6:34:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8518/12318 [14:44:44<6:34:41,  6.23s/it, v_num=e4xv, train/loss"
+      "     v5-L96-D1024-E  80%[===============>    ]   2.11G  40.0MB/s    eta 14s    "
      ]
     },
     {
@@ -102400,8 +2344,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8519/12318 [14:44:47<6:34:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8519/12318 [14:44:47<6:34:34,  6.23s/it, v_num=e4xv, train/loss"
+      "    v5-L96-D1024-E0  80%[===============>    ]   2.12G  39.9MB/s    eta 14s    "
      ]
     },
     {
@@ -102409,8 +2352,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8520/12318 [14:44:56<6:34:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8520/12318 [14:44:56<6:34:28,  6.23s/it, v_num=e4xv, train/loss"
+      "   v5-L96-D1024-E0_  80%[===============>    ]   2.12G  40.2MB/s    eta 14s    "
      ]
     },
     {
@@ -102418,8 +2360,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8521/12318 [14:45:03<6:34:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8521/12318 [14:45:03<6:34:23,  6.23s/it, v_num=e4xv, train/loss"
+      "  v5-L96-D1024-E0_1  81%[===============>    ]   2.13G  39.1MB/s    eta 14s    "
      ]
     },
     {
@@ -102427,8 +2368,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8522/12318 [14:45:05<6:34:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8522/12318 [14:45:05<6:34:14,  6.23s/it, v_num=e4xv, train/loss"
+      " v5-L96-D1024-E0_1-  81%[===============>    ]   2.14G  39.9MB/s    eta 14s    "
      ]
     },
     {
@@ -102436,7 +2376,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8523/12318 [14:45:10<6:34:08,  6.23s/it, v_num=e4xv, train/loss"
+      "v5-L96-D1024-E0_1-m  81%[===============>    ]   2.15G  39.5MB/s    eta 13s    "
      ]
     },
     {
@@ -102444,7 +2384,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8523/12318 [14:45:10<6:34:08,  6.23s/it, v_num=e4xv, train/loss"
+      "5-L96-D1024-E0_1-me  82%[===============>    ]   2.16G  39.8MB/s    eta 13s    "
      ]
     },
     {
@@ -102452,8 +2392,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8524/12318 [14:45:11<6:33:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8524/12318 [14:45:11<6:33:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-L96-D1024-E0_1-mem  82%[===============>    ]   2.17G  39.3MB/s    eta 13s    "
      ]
     },
     {
@@ -102461,8 +2400,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8525/12318 [14:45:20<6:33:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8525/12318 [14:45:20<6:33:54,  6.23s/it, v_num=e4xv, train/loss"
+      "L96-D1024-E0_1-mem-  82%[===============>    ]   2.17G  39.2MB/s    eta 13s    "
      ]
     },
     {
@@ -102470,8 +2408,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8526/12318 [14:45:29<6:33:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8526/12318 [14:45:29<6:33:49,  6.23s/it, v_num=e4xv, train/loss"
+      "96-D1024-E0_1-mem-c  82%[===============>    ]   2.18G  39.5MB/s    eta 13s    "
      ]
     },
     {
@@ -102479,8 +2416,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8527/12318 [14:45:33<6:33:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8527/12318 [14:45:33<6:33:42,  6.23s/it, v_num=e4xv, train/loss"
+      "6-D1024-E0_1-mem-ct  83%[===============>    ]   2.19G  39.7MB/s    eta 11s    "
      ]
     },
     {
@@ -102488,8 +2424,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8528/12318 [14:45:41<6:33:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8528/12318 [14:45:41<6:33:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-D1024-E0_1-mem-ctx  83%[===============>    ]   2.20G  40.6MB/s    eta 11s    "
      ]
     },
     {
@@ -102497,8 +2432,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8529/12318 [14:45:46<6:33:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8529/12318 [14:45:46<6:33:30,  6.23s/it, v_num=e4xv, train/loss"
+      "D1024-E0_1-mem-ctx-  83%[===============>    ]   2.21G  39.8MB/s    eta 11s    "
      ]
     },
     {
@@ -102506,8 +2440,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8530/12318 [14:45:49<6:33:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8530/12318 [14:45:49<6:33:22,  6.23s/it, v_num=e4xv, train/loss"
+      "1024-E0_1-mem-ctx-4  84%[===============>    ]   2.21G  39.9MB/s    eta 11s    "
      ]
     },
     {
@@ -102515,8 +2448,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8531/12318 [14:45:50<6:33:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8531/12318 [14:45:50<6:33:14,  6.23s/it, v_num=e4xv, train/loss"
+      "024-E0_1-mem-ctx-4k  84%[===============>    ]   2.22G  40.4MB/s    eta 11s    "
      ]
     },
     {
@@ -102524,8 +2456,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8532/12318 [14:45:56<6:33:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8532/12318 [14:45:56<6:33:07,  6.23s/it, v_num=e4xv, train/loss"
+      "24-E0_1-mem-ctx-4k.  84%[===============>    ]   2.23G  40.4MB/s    eta 10s    "
      ]
     },
     {
@@ -102533,8 +2464,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8533/12318 [14:46:05<6:33:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8533/12318 [14:46:05<6:33:02,  6.23s/it, v_num=e4xv, train/loss"
+      "4-E0_1-mem-ctx-4k.p  85%[================>   ]   2.24G  40.8MB/s    eta 10s    "
      ]
     },
     {
@@ -102542,8 +2472,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8534/12318 [14:46:09<6:32:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8534/12318 [14:46:09<6:32:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-E0_1-mem-ctx-4k.pt  85%[================>   ]   2.25G  40.5MB/s    eta 10s    "
      ]
     },
     {
@@ -102551,8 +2480,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8535/12318 [14:46:15<6:32:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8535/12318 [14:46:15<6:32:49,  6.23s/it, v_num=e4xv, train/loss"
+      "E0_1-mem-ctx-4k.pth  85%[================>   ]   2.26G  40.4MB/s    eta 10s    "
      ]
     },
     {
@@ -102560,8 +2488,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8536/12318 [14:46:21<6:32:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8536/12318 [14:46:21<6:32:43,  6.23s/it, v_num=e4xv, train/loss"
+      "0_1-mem-ctx-4k.pth   86%[================>   ]   2.27G  40.8MB/s    eta 10s    "
      ]
     },
     {
@@ -102569,8 +2496,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8537/12318 [14:46:23<6:32:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8537/12318 [14:46:23<6:32:34,  6.23s/it, v_num=e4xv, train/loss"
+      "_1-mem-ctx-4k.pth    86%[================>   ]   2.27G  41.0MB/s    eta 9s     "
      ]
     },
     {
@@ -102578,8 +2504,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8538/12318 [14:46:31<6:32:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8538/12318 [14:46:31<6:32:29,  6.23s/it, v_num=e4xv, train/loss"
+      "1-mem-ctx-4k.pth     86%[================>   ]   2.28G  40.1MB/s    eta 9s     "
      ]
     },
     {
@@ -102587,8 +2512,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8539/12318 [14:46:36<6:32:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8539/12318 [14:46:36<6:32:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-mem-ctx-4k.pth      87%[================>   ]   2.29G  40.1MB/s    eta 9s     "
      ]
     },
     {
@@ -102596,8 +2520,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8540/12318 [14:46:41<6:32:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8540/12318 [14:46:41<6:32:15,  6.23s/it, v_num=e4xv, train/loss"
+      "mem-ctx-4k.pth       87%[================>   ]   2.30G  40.0MB/s    eta 9s     "
      ]
     },
     {
@@ -102605,8 +2528,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8541/12318 [14:46:45<6:32:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8541/12318 [14:46:45<6:32:08,  6.23s/it, v_num=e4xv, train/loss"
+      "em-ctx-4k.pth        87%[================>   ]   2.31G  40.4MB/s    eta 9s     "
      ]
     },
     {
@@ -102614,8 +2536,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8542/12318 [14:46:53<6:32:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8542/12318 [14:46:53<6:32:02,  6.23s/it, v_num=e4xv, train/loss"
+      "m-ctx-4k.pth         87%[================>   ]   2.31G  40.1MB/s    eta 8s     "
      ]
     },
     {
@@ -102623,7 +2544,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8543/12318 [14:46:54<6:31:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-ctx-4k.pth          88%[================>   ]   2.32G  39.9MB/s    eta 8s     "
      ]
     },
     {
@@ -102631,7 +2552,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8543/12318 [14:46:54<6:31:54,  6.23s/it, v_num=e4xv, train/loss"
+      "ctx-4k.pth           88%[================>   ]   2.33G  40.0MB/s    eta 8s     "
      ]
     },
     {
@@ -102639,8 +2560,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8544/12318 [14:47:16<6:31:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8544/12318 [14:47:16<6:31:55,  6.23s/it, v_num=e4xv, train/loss"
+      "tx-4k.pth            88%[================>   ]   2.34G  39.2MB/s    eta 8s     "
      ]
     },
     {
@@ -102648,8 +2568,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8545/12318 [14:47:19<6:31:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8545/12318 [14:47:19<6:31:47,  6.23s/it, v_num=e4xv, train/loss"
+      "x-4k.pth             89%[================>   ]   2.35G  39.7MB/s    eta 8s     "
      ]
     },
     {
@@ -102657,8 +2576,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8546/12318 [14:47:23<6:31:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8546/12318 [14:47:23<6:31:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-4k.pth              89%[================>   ]   2.36G  39.4MB/s    eta 7s     "
      ]
     },
     {
@@ -102666,8 +2584,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8547/12318 [14:47:30<6:31:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8547/12318 [14:47:30<6:31:34,  6.23s/it, v_num=e4xv, train/loss"
+      "4k.pth               89%[================>   ]   2.36G  39.4MB/s    eta 7s     "
      ]
     },
     {
@@ -102675,8 +2592,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8548/12318 [14:47:35<6:31:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8548/12318 [14:47:35<6:31:27,  6.23s/it, v_num=e4xv, train/loss"
+      "k.pth                90%[=================>  ]   2.37G  39.6MB/s    eta 7s     "
      ]
     },
     {
@@ -102684,8 +2600,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8549/12318 [14:47:38<6:31:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8549/12318 [14:47:38<6:31:20,  6.23s/it, v_num=e4xv, train/loss"
+      ".pth                 90%[=================>  ]   2.38G  39.4MB/s    eta 7s     "
      ]
     },
     {
@@ -102693,8 +2608,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8550/12318 [14:47:43<6:31:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8550/12318 [14:47:43<6:31:13,  6.23s/it, v_num=e4xv, train/loss"
+      "pth                  90%[=================>  ]   2.39G  39.2MB/s    eta 7s     "
      ]
     },
     {
@@ -102702,8 +2616,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8551/12318 [14:47:51<6:31:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8551/12318 [14:47:51<6:31:07,  6.23s/it, v_num=e4xv, train/loss"
+      "th                   91%[=================>  ]   2.40G  39.2MB/s    eta 6s     "
      ]
     },
     {
@@ -102711,8 +2624,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8552/12318 [14:47:56<6:31:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8552/12318 [14:47:56<6:31:00,  6.23s/it, v_num=e4xv, train/loss"
+      "h                    91%[=================>  ]   2.41G  39.4MB/s    eta 6s     "
      ]
     },
     {
@@ -102720,8 +2632,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8553/12318 [14:48:03<6:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8553/12318 [14:48:03<6:30:55,  6.23s/it, v_num=e4xv, train/loss"
+      "                     91%[=================>  ]   2.41G  40.0MB/s    eta 6s     "
      ]
     },
     {
@@ -102729,8 +2640,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8554/12318 [14:48:11<6:30:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8554/12318 [14:48:11<6:30:49,  6.23s/it, v_num=e4xv, train/loss"
+      "                  v  92%[=================>  ]   2.42G  39.8MB/s    eta 6s     "
      ]
     },
     {
@@ -102738,8 +2648,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8555/12318 [14:48:14<6:30:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8555/12318 [14:48:14<6:30:42,  6.23s/it, v_num=e4xv, train/loss"
+      "                 v5  92%[=================>  ]   2.43G  39.7MB/s    eta 6s     "
      ]
     },
     {
@@ -102747,8 +2656,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8556/12318 [14:48:21<6:30:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8556/12318 [14:48:21<6:30:36,  6.23s/it, v_num=e4xv, train/loss"
+      "                v5-  92%[=================>  ]   2.44G  39.5MB/s    eta 5s     "
      ]
     },
     {
@@ -102756,8 +2664,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8557/12318 [14:48:27<6:30:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8557/12318 [14:48:27<6:30:29,  6.23s/it, v_num=e4xv, train/loss"
+      "               v5-L  92%[=================>  ]   2.45G  40.6MB/s    eta 5s     "
      ]
     },
     {
@@ -102765,8 +2672,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8558/12318 [14:48:36<6:30:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8558/12318 [14:48:36<6:30:24,  6.23s/it, v_num=e4xv, train/loss"
+      "              v5-L9  93%[=================>  ]   2.45G  39.8MB/s    eta 5s     "
      ]
     },
     {
@@ -102774,8 +2680,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8559/12318 [14:48:39<6:30:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8559/12318 [14:48:39<6:30:17,  6.23s/it, v_num=e4xv, train/loss"
+      "             v5-L96  93%[=================>  ]   2.46G  39.0MB/s    eta 5s     "
      ]
     },
     {
@@ -102783,8 +2688,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8560/12318 [14:48:46<6:30:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8560/12318 [14:48:46<6:30:11,  6.23s/it, v_num=e4xv, train/loss"
+      "            v5-L96-  93%[=================>  ]   2.46G  37.8MB/s    eta 5s     "
      ]
     },
     {
@@ -102792,8 +2696,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  69%|▋| 8561/12318 [14:48:50<6:30:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  69%|▋| 8561/12318 [14:48:50<6:30:03,  6.23s/it, v_num=e4xv, train/loss"
+      "           v5-L96-D  94%[=================>  ]   2.48G  40.1MB/s    eta 4s     "
      ]
     },
     {
@@ -102801,8 +2704,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8562/12318 [14:48:55<6:29:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8562/12318 [14:48:55<6:29:57,  6.23s/it, v_num=e4xv, train/loss"
+      "          v5-L96-D1  94%[=================>  ]   2.49G  40.3MB/s    eta 4s     "
      ]
     },
     {
@@ -102810,8 +2712,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8563/12318 [14:48:58<6:29:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8563/12318 [14:48:58<6:29:49,  6.23s/it, v_num=e4xv, train/loss"
+      "         v5-L96-D10  94%[=================>  ]   2.50G  40.4MB/s    eta 4s     "
      ]
     },
     {
@@ -102819,8 +2720,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8564/12318 [14:49:06<6:29:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8564/12318 [14:49:06<6:29:44,  6.23s/it, v_num=e4xv, train/loss"
+      "        v5-L96-D102  95%[==================> ]   2.51G  40.3MB/s    eta 4s     "
      ]
     },
     {
@@ -102828,8 +2728,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8565/12318 [14:49:15<6:29:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8565/12318 [14:49:15<6:29:39,  6.23s/it, v_num=e4xv, train/loss"
+      "       v5-L96-D1024  95%[==================> ]   2.51G  40.4MB/s    eta 4s     "
      ]
     },
     {
@@ -102837,8 +2736,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8566/12318 [14:49:22<6:29:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8566/12318 [14:49:22<6:29:33,  6.23s/it, v_num=e4xv, train/loss"
+      "      v5-L96-D1024-  95%[==================> ]   2.52G  41.1MB/s    eta 3s     "
      ]
     },
     {
@@ -102846,8 +2744,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8567/12318 [14:49:27<6:29:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8567/12318 [14:49:27<6:29:26,  6.23s/it, v_num=e4xv, train/loss"
+      "     v5-L96-D1024-E  96%[==================> ]   2.53G  40.8MB/s    eta 3s     "
      ]
     },
     {
@@ -102855,8 +2752,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8568/12318 [14:49:32<6:29:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8568/12318 [14:49:32<6:29:19,  6.23s/it, v_num=e4xv, train/loss"
+      "    v5-L96-D1024-E0  96%[==================> ]   2.54G  40.5MB/s    eta 3s     "
      ]
     },
     {
@@ -102864,8 +2760,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8569/12318 [14:49:35<6:29:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8569/12318 [14:49:35<6:29:12,  6.23s/it, v_num=e4xv, train/loss"
+      "   v5-L96-D1024-E0_  96%[==================> ]   2.55G  40.5MB/s    eta 3s     "
      ]
     },
     {
@@ -102873,8 +2768,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8570/12318 [14:49:41<6:29:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8570/12318 [14:49:41<6:29:05,  6.23s/it, v_num=e4xv, train/loss"
+      "  v5-L96-D1024-E0_1  97%[==================> ]   2.55G  40.3MB/s    eta 3s     "
      ]
     },
     {
@@ -102882,8 +2776,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8571/12318 [14:49:45<6:28:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8571/12318 [14:49:45<6:28:58,  6.23s/it, v_num=e4xv, train/loss"
+      " v5-L96-D1024-E0_1-  97%[==================> ]   2.56G  40.3MB/s    eta 2s     "
      ]
     },
     {
@@ -102891,8 +2784,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8572/12318 [14:49:46<6:28:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8572/12318 [14:49:46<6:28:50,  6.23s/it, v_num=e4xv, train/loss"
+      "v5-L96-D1024-E0_1-m  97%[==================> ]   2.57G  40.5MB/s    eta 2s     "
      ]
     },
     {
@@ -102900,8 +2792,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8573/12318 [14:49:52<6:28:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8573/12318 [14:49:52<6:28:43,  6.23s/it, v_num=e4xv, train/loss"
+      "5-L96-D1024-E0_1-me  98%[==================> ]   2.58G  40.9MB/s    eta 2s     "
      ]
     },
     {
@@ -102909,8 +2800,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8574/12318 [14:49:59<6:28:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8574/12318 [14:49:59<6:28:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-L96-D1024-E0_1-mem  98%[==================> ]   2.59G  40.8MB/s    eta 2s     "
      ]
     },
     {
@@ -102918,8 +2808,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8575/12318 [14:50:06<6:28:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8575/12318 [14:50:06<6:28:31,  6.23s/it, v_num=e4xv, train/loss"
+      "L96-D1024-E0_1-mem-  98%[==================> ]   2.60G  42.5MB/s    eta 2s     "
      ]
     },
     {
@@ -102927,8 +2816,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8576/12318 [14:50:40<6:28:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8576/12318 [14:50:40<6:28:37,  6.23s/it, v_num=e4xv, train/loss"
+      "96-D1024-E0_1-mem-c  99%[==================> ]   2.61G  42.8MB/s    eta 1s     "
      ]
     },
     {
@@ -102936,8 +2824,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8577/12318 [14:50:48<6:28:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8577/12318 [14:50:48<6:28:32,  6.23s/it, v_num=e4xv, train/loss"
+      "6-D1024-E0_1-mem-ct  99%[==================> ]   2.61G  40.8MB/s    eta 1s     "
      ]
     },
     {
@@ -102945,8 +2832,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8578/12318 [14:50:55<6:28:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8578/12318 [14:50:55<6:28:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-D1024-E0_1-mem-ctx  99%[==================> ]   2.62G  40.9MB/s    eta 1s     "
      ]
     },
     {
@@ -102954,8 +2840,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8579/12318 [14:51:03<6:28:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8579/12318 [14:51:03<6:28:21,  6.23s/it, v_num=e4xv, train/loss"
+      "D1024-E0_1-mem-ctx-  99%[==================> ]   2.63G  40.8MB/s    eta 1s     "
      ]
     },
     {
@@ -102963,9843 +2848,7796 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  70%|▋| 8580/12318 [14:51:09<6:28:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8580/12318 [14:51:09<6:28:14,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8581/12318 [14:51:18<6:28:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8581/12318 [14:51:18<6:28:09,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8582/12318 [14:51:22<6:28:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8582/12318 [14:51:22<6:28:02,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8583/12318 [14:51:30<6:27:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8583/12318 [14:51:30<6:27:56,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8584/12318 [14:51:36<6:27:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8584/12318 [14:51:36<6:27:50,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8585/12318 [14:51:41<6:27:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8585/12318 [14:51:41<6:27:43,  6.23s/it, v_num=e4xv, train/loss"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8586/12318 [14:51:48<6:27:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8586/12318 [14:51:48<6:27:38,  6.23s/it, v_num=e4xv, train/loss"
+      "v5-L96-D1024-E0_1-m 100%[===================>]   2.63G  41.1MB/s    in 68s     \r\n",
+      "\r\n",
+      "2023-09-02 06:17:41 (39.7 MB/s) - ‘v5-L96-D1024-E0_1-mem-ctx-4k.pth’ saved [2825976699/2825976699]\r\n",
+      "\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8587/12318 [14:51:52<6:27:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8587/12318 [14:51:52<6:27:30,  6.23s/it, v_num=e4xv, train/loss"
+      "total 2.7G\r\n",
+      "drwxr-xr-x  2 root root   54 Sep  2 06:16 .\r\n",
+      "drwxr-xr-x 19 root root 4.0K Sep  2 06:16 ..\r\n",
+      "-rw-r--r--  1 root root 2.7G Sep  2 05:37 v5-L96-D1024-E0_1-mem-ctx-4k.pth\r\n"
      ]
+    }
+   ],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-memory/{FILENAME_PREFIX}-mem-ctx-4k.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "adf68d89",
+   "metadata": {
+    "papermill": {
+     "duration": 0.01742,
+     "end_time": "2023-09-02T06:17:41.804733",
+     "exception": false,
+     "start_time": "2023-09-02T06:17:41.787313",
+     "status": "completed"
     },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8588/12318 [14:52:01<6:27:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8588/12318 [14:52:01<6:27:25,  6.23s/it, v_num=e4xv, train/loss"
-     ]
+    "tags": []
+   },
+   "source": [
+    "## Tune 6 : Ramping up the ctx size (8192), memory training\n",
+    "\n",
+    "- Tune 6: Large ctx size (8192), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "51c58e54",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-02T06:17:41.841382Z",
+     "iopub.status.busy": "2023-09-02T06:17:41.841191Z",
+     "iopub.status.idle": "2023-09-02T06:17:48.969203Z",
+     "shell.execute_reply": "2023-09-02T06:17:48.967550Z"
     },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8589/12318 [14:52:09<6:27:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8589/12318 [14:52:09<6:27:20,  6.23s/it, v_num=e4xv, train/loss"
-     ]
+    "papermill": {
+     "duration": 7.217868,
+     "end_time": "2023-09-02T06:17:49.040024",
+     "exception": false,
+     "start_time": "2023-09-02T06:17:41.822156",
+     "status": "completed"
     },
+    "tags": []
+   },
+   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8590/12318 [14:52:17<6:27:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8590/12318 [14:52:17<6:27:14,  6.23s/it, v_num=e4xv, train/loss"
+      "## Generating word reptition dataset ##\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8591/12318 [14:52:22<6:27:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8591/12318 [14:52:22<6:27:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2 max words, 50 samples - at ../dataset/word-2-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8592/12318 [14:52:29<6:27:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8592/12318 [14:52:29<6:27:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 10 max words, 50 samples - at ../dataset/gen-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8593/12318 [14:52:37<6:26:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8593/12318 [14:52:37<6:26:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 35 max words, 50 samples - at ../dataset/gen-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8594/12318 [14:52:41<6:26:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8594/12318 [14:52:41<6:26:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 566 samples (1 token repeat) - 5 max words - at ../dataset/shuffle-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8595/12318 [14:52:47<6:26:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8595/12318 [14:52:47<6:26:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5 max words, 50 samples - at ../dataset/gen-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8596/12318 [14:52:54<6:26:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8596/12318 [14:52:54<6:26:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 60 max words, 50 samples - at ../dataset/gen-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8597/12318 [14:53:01<6:26:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8597/12318 [14:53:01<6:26:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 30 max words, 50 samples - at ../dataset/gen-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8598/12318 [14:53:07<6:26:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8598/12318 [14:53:07<6:26:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 84 samples (1 token repeat) - 30 max words - at ../dataset/shuffle-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8599/12318 [14:53:12<6:26:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8599/12318 [14:53:12<6:26:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 77 samples (1 token repeat) - 35 max words - at ../dataset/shuffle-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8600/12318 [14:53:21<6:26:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8600/12318 [14:53:21<6:26:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 25 max words, 50 samples - at ../dataset/gen-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8601/12318 [14:53:26<6:26:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8601/12318 [14:53:26<6:26:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 45 max words, 50 samples - at ../dataset/gen-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8602/12318 [14:53:28<6:25:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8602/12318 [14:53:28<6:25:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 63 samples (1 token repeat) - 40 max words - at ../dataset/shuffle-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8603/12318 [14:53:31<6:25:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8603/12318 [14:53:31<6:25:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 50 max words, 50 samples - at ../dataset/gen-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8604/12318 [14:53:40<6:25:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 55 max words, 50 samples - at ../dataset/gen-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8604/12318 [14:53:40<6:25:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 15 max words, 50 samples - at ../dataset/gen-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8605/12318 [14:53:47<6:25:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8605/12318 [14:53:47<6:25:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 120 max words, 50 samples - at ../dataset/gen-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8606/12318 [14:53:48<6:25:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8606/12318 [14:53:48<6:25:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 129 samples (1 token repeat) - 20 max words - at ../dataset/shuffle-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8607/12318 [14:53:51<6:25:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 56 samples (1 token repeat) - 50 max words - at ../dataset/shuffle-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8607/12318 [14:53:51<6:25:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 264 samples (1 token repeat) - 10 max words - at ../dataset/shuffle-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8608/12318 [14:54:06<6:25:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8608/12318 [14:54:06<6:25:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 40 max words, 50 samples - at ../dataset/gen-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8609/12318 [14:54:14<6:25:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8609/12318 [14:54:14<6:25:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 189 samples (1 token repeat) - 15 max words - at ../dataset/shuffle-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8610/12318 [14:54:17<6:25:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8610/12318 [14:54:17<6:25:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 115 max words - at ../dataset/shuffle-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8611/12318 [14:54:23<6:25:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8611/12318 [14:54:23<6:25:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 58 samples (1 token repeat) - 45 max words - at ../dataset/shuffle-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8612/12318 [14:54:31<6:24:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8612/12318 [14:54:31<6:24:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 20 max words, 50 samples - at ../dataset/gen-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8613/12318 [14:54:37<6:24:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8613/12318 [14:54:37<6:24:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (1 token repeat) - 110 max words - at ../dataset/shuffle-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8614/12318 [14:54:39<6:24:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8614/12318 [14:54:39<6:24:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 220 max words - at ../dataset/shuffle-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8615/12318 [14:54:40<6:24:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8615/12318 [14:54:40<6:24:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 110 max words, 50 samples - at ../dataset/gen-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8616/12318 [14:54:45<6:24:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8616/12318 [14:54:45<6:24:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 50 samples (1 token repeat) - 55 max words - at ../dataset/shuffle-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8617/12318 [14:54:48<6:24:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8617/12318 [14:54:48<6:24:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 105 max words, 50 samples - at ../dataset/gen-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8618/12318 [14:54:57<6:24:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8618/12318 [14:54:57<6:24:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 85 max words, 50 samples - at ../dataset/gen-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8619/12318 [14:55:04<6:24:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8619/12318 [14:55:04<6:24:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 100 max words, 50 samples - at ../dataset/gen-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8620/12318 [14:55:09<6:24:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8620/12318 [14:55:09<6:24:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 235 max words, 50 samples - at ../dataset/gen-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8621/12318 [14:55:15<6:23:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8621/12318 [14:55:15<6:23:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 19 samples (1 token repeat) - 120 max words - at ../dataset/shuffle-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8622/12318 [14:55:17<6:23:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8622/12318 [14:55:17<6:23:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 140 max words, 50 samples - at ../dataset/gen-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8623/12318 [14:55:25<6:23:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8623/12318 [14:55:25<6:23:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 125 max words, 50 samples - at ../dataset/gen-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8624/12318 [14:55:28<6:23:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8624/12318 [14:55:28<6:23:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 80 max words, 50 samples - at ../dataset/gen-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8625/12318 [14:55:31<6:23:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8625/12318 [14:55:31<6:23:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 95 max words, 50 samples - at ../dataset/gen-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8626/12318 [14:55:36<6:23:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8626/12318 [14:55:36<6:23:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 105 samples (1 token repeat) - 25 max words - at ../dataset/shuffle-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8627/12318 [14:55:43<6:23:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8627/12318 [14:55:43<6:23:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 215 max words - at ../dataset/shuffle-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8628/12318 [14:55:46<6:23:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8628/12318 [14:55:46<6:23:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 265 max words - at ../dataset/shuffle-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8629/12318 [14:55:49<6:22:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 21 samples (1 token repeat) - 105 max words - at ../dataset/shuffle-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8629/12318 [14:55:49<6:22:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 310 max words, 50 samples - at ../dataset/gen-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8630/12318 [14:55:57<6:22:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8630/12318 [14:55:57<6:22:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 70 max words, 50 samples - at ../dataset/gen-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8631/12318 [14:56:02<6:22:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8631/12318 [14:56:02<6:22:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 115 max words, 50 samples - at ../dataset/gen-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8632/12318 [14:56:05<6:22:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8632/12318 [14:56:05<6:22:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 590 max words - at ../dataset/shuffle-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8633/12318 [14:56:12<6:22:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8633/12318 [14:56:12<6:22:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 200 max words, 50 samples - at ../dataset/gen-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8634/12318 [14:56:17<6:22:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8634/12318 [14:56:17<6:22:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 595 max words - at ../dataset/shuffle-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8635/12318 [14:56:26<6:22:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8635/12318 [14:56:26<6:22:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 255 max words - at ../dataset/shuffle-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8636/12318 [14:56:31<6:22:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8636/12318 [14:56:31<6:22:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 175 max words, 50 samples - at ../dataset/gen-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8637/12318 [14:56:40<6:22:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8637/12318 [14:56:40<6:22:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 26 samples (1 token repeat) - 95 max words - at ../dataset/shuffle-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8638/12318 [14:56:47<6:22:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8638/12318 [14:56:47<6:22:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 450 max words - at ../dataset/shuffle-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8639/12318 [14:56:52<6:21:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8639/12318 [14:56:52<6:21:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 270 max words, 50 samples - at ../dataset/gen-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8640/12318 [14:57:18<6:21:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8640/12318 [14:57:18<6:21:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 44 samples (1 token repeat) - 60 max words - at ../dataset/shuffle-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8641/12318 [14:57:23<6:21:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8641/12318 [14:57:23<6:21:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 415 max words, 50 samples - at ../dataset/gen-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8642/12318 [14:57:27<6:21:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 585 max words, 50 samples - at ../dataset/gen-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8642/12318 [14:57:27<6:21:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 350 max words - at ../dataset/shuffle-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8643/12318 [14:57:32<6:21:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8643/12318 [14:57:32<6:21:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 430 max words, 50 samples - at ../dataset/gen-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8644/12318 [14:57:37<6:21:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8644/12318 [14:57:37<6:21:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 17 samples (1 token repeat) - 135 max words - at ../dataset/shuffle-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8645/12318 [14:57:42<6:21:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8645/12318 [14:57:42<6:21:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 525 max words - at ../dataset/shuffle-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8646/12318 [14:57:47<6:21:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8646/12318 [14:57:47<6:21:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 530 max words - at ../dataset/shuffle-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8647/12318 [14:57:50<6:21:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8647/12318 [14:57:50<6:21:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 8 samples (1 token repeat) - 305 max words - at ../dataset/shuffle-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8648/12318 [14:57:55<6:21:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8648/12318 [14:57:55<6:21:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 210 max words - at ../dataset/shuffle-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8649/12318 [14:58:01<6:20:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8649/12318 [14:58:01<6:20:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 285 max words, 50 samples - at ../dataset/gen-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8650/12318 [14:58:07<6:20:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8650/12318 [14:58:07<6:20:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 330 max words, 50 samples - at ../dataset/gen-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8651/12318 [14:58:12<6:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8651/12318 [14:58:12<6:20:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 495 max words - at ../dataset/shuffle-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8652/12318 [14:58:17<6:20:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8652/12318 [14:58:17<6:20:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 400 max words, 50 samples - at ../dataset/gen-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8653/12318 [14:58:22<6:20:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8653/12318 [14:58:22<6:20:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 240 max words - at ../dataset/shuffle-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8654/12318 [14:58:28<6:20:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8654/12318 [14:58:28<6:20:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 580 max words - at ../dataset/shuffle-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8655/12318 [14:58:32<6:20:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8655/12318 [14:58:32<6:20:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 520 max words, 50 samples - at ../dataset/gen-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8656/12318 [14:58:39<6:20:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8656/12318 [14:58:39<6:20:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 370 max words, 50 samples - at ../dataset/gen-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8657/12318 [14:58:47<6:20:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 425 max words, 50 samples - at ../dataset/gen-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8657/12318 [14:58:47<6:20:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 365 max words - at ../dataset/shuffle-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8658/12318 [14:58:52<6:19:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8658/12318 [14:58:52<6:19:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 225 max words - at ../dataset/shuffle-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8659/12318 [14:58:54<6:19:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8659/12318 [14:58:54<6:19:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 10 samples (1 token repeat) - 230 max words - at ../dataset/shuffle-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8660/12318 [14:59:02<6:19:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8660/12318 [14:59:02<6:19:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 290 max words, 50 samples - at ../dataset/gen-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8661/12318 [14:59:07<6:19:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8661/12318 [14:59:07<6:19:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 400 max words - at ../dataset/shuffle-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8662/12318 [14:59:10<6:19:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 445 max words - at ../dataset/shuffle-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8662/12318 [14:59:10<6:19:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 180 max words, 50 samples - at ../dataset/gen-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8663/12318 [14:59:16<6:19:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8663/12318 [14:59:16<6:19:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 65 max words, 50 samples - at ../dataset/gen-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8664/12318 [14:59:24<6:19:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8664/12318 [14:59:24<6:19:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 545 max words - at ../dataset/shuffle-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8665/12318 [14:59:28<6:19:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8665/12318 [14:59:28<6:19:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 130 max words - at ../dataset/shuffle-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8666/12318 [14:59:35<6:19:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8666/12318 [14:59:35<6:19:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 560 max words, 50 samples - at ../dataset/gen-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8667/12318 [14:59:43<6:19:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8667/12318 [14:59:43<6:19:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 480 max words, 50 samples - at ../dataset/gen-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8668/12318 [14:59:48<6:18:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8668/12318 [14:59:48<6:18:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 32 samples (1 token repeat) - 85 max words - at ../dataset/shuffle-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8669/12318 [14:59:52<6:18:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8669/12318 [14:59:52<6:18:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 36 samples (1 token repeat) - 75 max words - at ../dataset/shuffle-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8670/12318 [15:00:01<6:18:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8670/12318 [15:00:01<6:18:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 155 max words - at ../dataset/shuffle-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8671/12318 [15:00:08<6:18:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8671/12318 [15:00:08<6:18:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 800 max words - at ../dataset/shuffle-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8672/12318 [15:00:36<6:18:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8672/12318 [15:00:36<6:18:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 365 max words, 50 samples - at ../dataset/gen-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8673/12318 [15:00:38<6:18:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8673/12318 [15:00:38<6:18:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 600 max words - at ../dataset/shuffle-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8674/12318 [15:00:47<6:18:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8674/12318 [15:00:47<6:18:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 18 samples (1 token repeat) - 125 max words - at ../dataset/shuffle-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8675/12318 [15:00:48<6:18:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8675/12318 [15:00:48<6:18:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 140 max words - at ../dataset/shuffle-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8676/12318 [15:00:57<6:18:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8676/12318 [15:00:57<6:18:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 155 max words, 50 samples - at ../dataset/gen-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8677/12318 [15:01:04<6:18:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8677/12318 [15:01:04<6:18:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 130 max words, 50 samples - at ../dataset/gen-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8678/12318 [15:01:12<6:18:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 445 max words, 50 samples - at ../dataset/gen-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8678/12318 [15:01:12<6:18:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 470 max words - at ../dataset/shuffle-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8679/12318 [15:01:15<6:17:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8679/12318 [15:01:15<6:17:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 580 max words, 50 samples - at ../dataset/gen-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8680/12318 [15:01:22<6:17:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8680/12318 [15:01:22<6:17:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 610 max words - at ../dataset/shuffle-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8681/12318 [15:01:28<6:17:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8681/12318 [15:01:28<6:17:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 405 max words, 50 samples - at ../dataset/gen-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8682/12318 [15:01:34<6:17:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8682/12318 [15:01:34<6:17:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 490 max words - at ../dataset/shuffle-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8683/12318 [15:01:39<6:17:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8683/12318 [15:01:39<6:17:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 630 max words - at ../dataset/shuffle-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  70%|▋| 8684/12318 [15:01:45<6:17:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  70%|▋| 8684/12318 [15:01:45<6:17:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 755 max words, 50 samples - at ../dataset/gen-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8685/12318 [15:01:46<6:17:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8685/12318 [15:01:46<6:17:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 255 max words, 50 samples - at ../dataset/gen-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8686/12318 [15:01:51<6:17:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8686/12318 [15:01:51<6:17:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 745 max words - at ../dataset/shuffle-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8687/12318 [15:02:00<6:17:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8687/12318 [15:02:00<6:17:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 280 max words - at ../dataset/shuffle-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8688/12318 [15:02:01<6:16:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8688/12318 [15:02:01<6:16:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 690 max words - at ../dataset/shuffle-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8689/12318 [15:02:06<6:16:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8689/12318 [15:02:06<6:16:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 160 max words - at ../dataset/shuffle-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8690/12318 [15:02:10<6:16:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8690/12318 [15:02:10<6:16:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 740 max words, 50 samples - at ../dataset/gen-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8691/12318 [15:02:17<6:16:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8691/12318 [15:02:17<6:16:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 145 max words, 50 samples - at ../dataset/gen-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8692/12318 [15:02:20<6:16:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8692/12318 [15:02:20<6:16:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 360 max words - at ../dataset/shuffle-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8693/12318 [15:02:26<6:16:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8693/12318 [15:02:26<6:16:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 835 max words - at ../dataset/shuffle-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8694/12318 [15:02:31<6:16:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8694/12318 [15:02:31<6:16:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 210 max words, 50 samples - at ../dataset/gen-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8695/12318 [15:02:33<6:16:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 585 max words - at ../dataset/shuffle-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8695/12318 [15:02:33<6:16:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 65 max words - at ../dataset/shuffle-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8696/12318 [15:02:41<6:15:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8696/12318 [15:02:41<6:15:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 250 max words - at ../dataset/shuffle-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8697/12318 [15:02:44<6:15:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8697/12318 [15:02:44<6:15:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 450 max words, 50 samples - at ../dataset/gen-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8698/12318 [15:02:50<6:15:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8698/12318 [15:02:50<6:15:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 550 max words - at ../dataset/shuffle-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8699/12318 [15:02:59<6:15:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 455 max words, 50 samples - at ../dataset/gen-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8699/12318 [15:02:59<6:15:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 435 max words, 50 samples - at ../dataset/gen-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8700/12318 [15:03:00<6:15:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8700/12318 [15:03:00<6:15:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 760 max words - at ../dataset/shuffle-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8701/12318 [15:03:02<6:15:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8701/12318 [15:03:02<6:15:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 165 max words, 50 samples - at ../dataset/gen-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8702/12318 [15:03:06<6:15:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8702/12318 [15:03:06<6:15:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 170 max words - at ../dataset/shuffle-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8703/12318 [15:03:08<6:15:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8703/12318 [15:03:08<6:15:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 180 max words - at ../dataset/shuffle-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8704/12318 [15:03:55<6:15:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8704/12318 [15:03:55<6:15:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 75 max words, 50 samples - at ../dataset/gen-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8705/12318 [15:04:01<6:15:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8705/12318 [15:04:01<6:15:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 28 samples (1 token repeat) - 100 max words - at ../dataset/shuffle-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8706/12318 [15:04:06<6:15:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8706/12318 [15:04:06<6:15:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 39 samples (1 token repeat) - 70 max words - at ../dataset/shuffle-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8707/12318 [15:04:12<6:14:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8707/12318 [15:04:12<6:14:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 550 max words, 50 samples - at ../dataset/gen-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8708/12318 [15:04:16<6:14:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8708/12318 [15:04:16<6:14:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 955 max words - at ../dataset/shuffle-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8709/12318 [15:04:23<6:14:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8709/12318 [15:04:23<6:14:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 355 max words, 50 samples - at ../dataset/gen-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8710/12318 [15:04:26<6:14:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8710/12318 [15:04:26<6:14:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 295 max words - at ../dataset/shuffle-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8711/12318 [15:04:28<6:14:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8711/12318 [15:04:28<6:14:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 830 max words - at ../dataset/shuffle-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8712/12318 [15:04:30<6:14:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8712/12318 [15:04:30<6:14:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 770 max words - at ../dataset/shuffle-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8713/12318 [15:04:34<6:14:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8713/12318 [15:04:34<6:14:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 285 max words - at ../dataset/shuffle-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8714/12318 [15:04:38<6:14:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8714/12318 [15:04:38<6:14:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 470 max words, 50 samples - at ../dataset/gen-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8715/12318 [15:04:46<6:14:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8715/12318 [15:04:46<6:14:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 345 max words, 50 samples - at ../dataset/gen-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8716/12318 [15:04:50<6:13:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8716/12318 [15:04:50<6:13:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 355 max words - at ../dataset/shuffle-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8717/12318 [15:04:55<6:13:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8717/12318 [15:04:55<6:13:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 410 max words, 50 samples - at ../dataset/gen-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8718/12318 [15:05:03<6:13:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8718/12318 [15:05:03<6:13:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 535 max words - at ../dataset/shuffle-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8719/12318 [15:05:12<6:13:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8719/12318 [15:05:12<6:13:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 165 max words - at ../dataset/shuffle-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8720/12318 [15:05:17<6:13:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8720/12318 [15:05:17<6:13:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 995 max words, 50 samples - at ../dataset/gen-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8721/12318 [15:05:20<6:13:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8721/12318 [15:05:20<6:13:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 60 samples (20 token repeat) - 1100 max words - at ../dataset/shuffle-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8722/12318 [15:05:28<6:13:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8722/12318 [15:05:28<6:13:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 225 max words, 50 samples - at ../dataset/gen-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8723/12318 [15:05:32<6:13:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8723/12318 [15:05:32<6:13:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 245 max words, 50 samples - at ../dataset/gen-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8724/12318 [15:05:41<6:13:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8724/12318 [15:05:41<6:13:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 190 max words - at ../dataset/shuffle-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8725/12318 [15:05:44<6:12:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8725/12318 [15:05:44<6:12:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 13 samples (1 token repeat) - 200 max words - at ../dataset/shuffle-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8726/12318 [15:05:46<6:12:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8726/12318 [15:05:46<6:12:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 310 max words - at ../dataset/shuffle-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8727/12318 [15:05:48<6:12:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8727/12318 [15:05:48<6:12:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 230 max words, 50 samples - at ../dataset/gen-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8728/12318 [15:05:54<6:12:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8728/12318 [15:05:54<6:12:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 59 samples (20 token repeat) - 1200 max words - at ../dataset/shuffle-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8729/12318 [15:06:00<6:12:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8729/12318 [15:06:00<6:12:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 390 max words, 50 samples - at ../dataset/gen-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8730/12318 [15:06:01<6:12:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8730/12318 [15:06:01<6:12:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 500 max words - at ../dataset/shuffle-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8731/12318 [15:06:10<6:12:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8731/12318 [15:06:10<6:12:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 685 max words, 50 samples - at ../dataset/gen-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8732/12318 [15:06:15<6:12:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8732/12318 [15:06:15<6:12:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 635 max words, 50 samples - at ../dataset/gen-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8733/12318 [15:06:20<6:12:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8733/12318 [15:06:20<6:12:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 545 max words, 50 samples - at ../dataset/gen-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8734/12318 [15:06:24<6:11:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8734/12318 [15:06:24<6:11:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4200 max words - at ../dataset/shuffle-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8735/12318 [15:06:29<6:11:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8735/12318 [15:06:29<6:11:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 715 max words - at ../dataset/shuffle-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8736/12318 [15:07:14<6:11:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8736/12318 [15:07:14<6:11:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 625 max words, 50 samples - at ../dataset/gen-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8737/12318 [15:07:20<6:11:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8737/12318 [15:07:20<6:11:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 795 max words - at ../dataset/shuffle-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8738/12318 [15:07:25<6:11:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8738/12318 [15:07:25<6:11:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 705 max words - at ../dataset/shuffle-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8739/12318 [15:07:29<6:11:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8739/12318 [15:07:29<6:11:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 33 samples (1 token repeat) - 80 max words - at ../dataset/shuffle-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8740/12318 [15:07:36<6:11:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8740/12318 [15:07:36<6:11:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 905 max words, 50 samples - at ../dataset/gen-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8741/12318 [15:07:43<6:11:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8741/12318 [15:07:43<6:11:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 780 max words - at ../dataset/shuffle-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8742/12318 [15:07:45<6:11:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8742/12318 [15:07:45<6:11:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 520 max words - at ../dataset/shuffle-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8743/12318 [15:07:51<6:11:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8743/12318 [15:07:51<6:11:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 375 max words, 50 samples - at ../dataset/gen-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8744/12318 [15:07:59<6:11:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8744/12318 [15:07:59<6:11:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 940 max words - at ../dataset/shuffle-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8745/12318 [15:08:06<6:11:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8745/12318 [15:08:06<6:11:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 150 max words, 50 samples - at ../dataset/gen-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8746/12318 [15:08:12<6:10:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8746/12318 [15:08:12<6:10:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 345 max words - at ../dataset/shuffle-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8747/12318 [15:08:21<6:10:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8747/12318 [15:08:21<6:10:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 275 max words, 50 samples - at ../dataset/gen-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8748/12318 [15:08:29<6:10:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8748/12318 [15:08:29<6:10:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 425 max words - at ../dataset/shuffle-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8749/12318 [15:08:32<6:10:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8749/12318 [15:08:32<6:10:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 945 max words - at ../dataset/shuffle-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8750/12318 [15:08:33<6:10:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8750/12318 [15:08:33<6:10:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 815 max words - at ../dataset/shuffle-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8751/12318 [15:08:37<6:10:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8751/12318 [15:08:37<6:10:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 960 max words - at ../dataset/shuffle-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8752/12318 [15:08:41<6:10:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8752/12318 [15:08:41<6:10:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 195 max words - at ../dataset/shuffle-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8753/12318 [15:08:46<6:10:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8753/12318 [15:08:46<6:10:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 825 max words - at ../dataset/shuffle-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8754/12318 [15:08:51<6:10:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8754/12318 [15:08:51<6:10:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 980 max words - at ../dataset/shuffle-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8755/12318 [15:08:54<6:09:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8755/12318 [15:08:54<6:09:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 335 max words - at ../dataset/shuffle-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8756/12318 [15:08:55<6:09:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8756/12318 [15:08:55<6:09:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 830 max words, 50 samples - at ../dataset/gen-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8757/12318 [15:08:57<6:09:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8757/12318 [15:08:57<6:09:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 810 max words - at ../dataset/shuffle-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8758/12318 [15:08:58<6:09:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8758/12318 [15:08:58<6:09:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 820 max words - at ../dataset/shuffle-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8759/12318 [15:08:59<6:09:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8759/12318 [15:08:59<6:09:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 990 max words - at ../dataset/shuffle-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8760/12318 [15:09:03<6:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8760/12318 [15:09:03<6:09:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 725 max words, 50 samples - at ../dataset/gen-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8761/12318 [15:09:08<6:09:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8761/12318 [15:09:08<6:09:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 975 max words - at ../dataset/shuffle-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8762/12318 [15:09:15<6:09:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8762/12318 [15:09:15<6:09:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 315 max words - at ../dataset/shuffle-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8763/12318 [15:09:18<6:08:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8763/12318 [15:09:18<6:08:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 680 max words - at ../dataset/shuffle-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8764/12318 [15:09:23<6:08:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8764/12318 [15:09:23<6:08:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5400 max words - at ../dataset/shuffle-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8765/12318 [15:09:32<6:08:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8765/12318 [15:09:32<6:08:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 525 max words, 50 samples - at ../dataset/gen-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8766/12318 [15:09:36<6:08:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8766/12318 [15:09:36<6:08:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7300 max words - at ../dataset/shuffle-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8767/12318 [15:09:41<6:08:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 170 max words, 50 samples - at ../dataset/gen-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8767/12318 [15:09:41<6:08:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 250 max words, 50 samples - at ../dataset/gen-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8768/12318 [15:10:33<6:08:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8768/12318 [15:10:33<6:08:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 380 max words, 50 samples - at ../dataset/gen-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8769/12318 [15:10:36<6:08:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8769/12318 [15:10:36<6:08:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 740 max words - at ../dataset/shuffle-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8770/12318 [15:10:45<6:08:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8770/12318 [15:10:45<6:08:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 725 max words - at ../dataset/shuffle-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8771/12318 [15:10:50<6:08:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8771/12318 [15:10:50<6:08:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 715 max words, 50 samples - at ../dataset/gen-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8772/12318 [15:10:55<6:08:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8772/12318 [15:10:55<6:08:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 760 max words, 50 samples - at ../dataset/gen-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8773/12318 [15:10:56<6:08:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8773/12318 [15:10:56<6:08:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 510 max words - at ../dataset/shuffle-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8774/12318 [15:11:04<6:08:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8774/12318 [15:11:04<6:08:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 375 max words - at ../dataset/shuffle-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8775/12318 [15:11:11<6:07:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8775/12318 [15:11:11<6:07:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 860 max words - at ../dataset/shuffle-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8776/12318 [15:11:16<6:07:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8776/12318 [15:11:16<6:07:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 330 max words - at ../dataset/shuffle-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8777/12318 [15:11:19<6:07:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8777/12318 [15:11:19<6:07:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 755 max words - at ../dataset/shuffle-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8778/12318 [15:11:24<6:07:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8778/12318 [15:11:24<6:07:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 905 max words - at ../dataset/shuffle-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8779/12318 [15:11:32<6:07:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8779/12318 [15:11:32<6:07:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 915 max words, 50 samples - at ../dataset/gen-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8780/12318 [15:11:35<6:07:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8780/12318 [15:11:35<6:07:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 660 max words - at ../dataset/shuffle-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8781/12318 [15:11:39<6:07:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8781/12318 [15:11:39<6:07:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4000 max words - at ../dataset/shuffle-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8782/12318 [15:11:40<6:07:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8782/12318 [15:11:40<6:07:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 315 max words, 50 samples - at ../dataset/gen-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8783/12318 [15:11:44<6:06:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8783/12318 [15:11:44<6:06:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 880 max words, 50 samples - at ../dataset/gen-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8784/12318 [15:11:47<6:06:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8784/12318 [15:11:47<6:06:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 195 max words, 50 samples - at ../dataset/gen-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8785/12318 [15:11:56<6:06:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8785/12318 [15:11:56<6:06:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 625 max words - at ../dataset/shuffle-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8786/12318 [15:12:02<6:06:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8786/12318 [15:12:02<6:06:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2100 max words - at ../dataset/shuffle-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8787/12318 [15:12:07<6:06:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8787/12318 [15:12:07<6:06:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 325 max words, 50 samples - at ../dataset/gen-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8788/12318 [15:12:11<6:06:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8788/12318 [15:12:11<6:06:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 735 max words - at ../dataset/shuffle-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8789/12318 [15:12:19<6:06:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8789/12318 [15:12:19<6:06:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 795 max words, 50 samples - at ../dataset/gen-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8790/12318 [15:12:24<6:06:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8790/12318 [15:12:24<6:06:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 890 max words, 50 samples - at ../dataset/gen-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8791/12318 [15:12:33<6:06:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 685 max words - at ../dataset/shuffle-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8791/12318 [15:12:33<6:06:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 620 max words - at ../dataset/shuffle-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8792/12318 [15:12:36<6:05:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8792/12318 [15:12:36<6:05:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 785 max words - at ../dataset/shuffle-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8793/12318 [15:12:37<6:05:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8793/12318 [15:12:37<6:05:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 275 max words - at ../dataset/shuffle-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8794/12318 [15:12:41<6:05:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8794/12318 [15:12:41<6:05:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 655 max words - at ../dataset/shuffle-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8795/12318 [15:12:48<6:05:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8795/12318 [15:12:48<6:05:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 340 max words, 50 samples - at ../dataset/gen-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8796/12318 [15:12:51<6:05:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8796/12318 [15:12:51<6:05:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 920 max words - at ../dataset/shuffle-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8797/12318 [15:12:59<6:05:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8797/12318 [15:12:59<6:05:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7100 max words - at ../dataset/shuffle-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8798/12318 [15:13:00<6:05:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8798/12318 [15:13:00<6:05:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 655 max words, 50 samples - at ../dataset/gen-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8799/12318 [15:13:06<6:05:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8799/12318 [15:13:06<6:05:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 720 max words - at ../dataset/shuffle-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8800/12318 [15:13:51<6:05:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8800/12318 [15:13:51<6:05:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 970 max words - at ../dataset/shuffle-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8801/12318 [15:14:18<6:05:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8801/12318 [15:14:18<6:05:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 845 max words, 50 samples - at ../dataset/gen-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8802/12318 [15:14:27<6:05:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8802/12318 [15:14:27<6:05:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3000 max words - at ../dataset/shuffle-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8803/12318 [15:14:30<6:05:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8803/12318 [15:14:30<6:05:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 430 max words - at ../dataset/shuffle-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8804/12318 [15:14:32<6:05:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8804/12318 [15:14:32<6:05:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 175 max words - at ../dataset/shuffle-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8805/12318 [15:14:36<6:04:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8805/12318 [15:14:36<6:04:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 790 max words - at ../dataset/shuffle-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8806/12318 [15:14:42<6:04:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8806/12318 [15:14:42<6:04:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 90 max words, 50 samples - at ../dataset/gen-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  71%|▋| 8807/12318 [15:14:49<6:04:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  71%|▋| 8807/12318 [15:14:49<6:04:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 895 max words - at ../dataset/shuffle-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8808/12318 [15:14:56<6:04:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8808/12318 [15:14:56<6:04:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 37 samples (20 token repeat) - 2500 max words - at ../dataset/shuffle-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8809/12318 [15:15:00<6:04:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8809/12318 [15:15:00<6:04:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 865 max words - at ../dataset/shuffle-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8810/12318 [15:15:09<6:04:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8810/12318 [15:15:09<6:04:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 51 samples (20 token repeat) - 1300 max words - at ../dataset/shuffle-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8811/12318 [15:15:14<6:04:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8811/12318 [15:15:14<6:04:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 660 max words, 50 samples - at ../dataset/gen-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8812/12318 [15:15:20<6:04:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8812/12318 [15:15:20<6:04:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 220 max words, 50 samples - at ../dataset/gen-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8813/12318 [15:15:22<6:04:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8813/12318 [15:15:22<6:04:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 485 max words - at ../dataset/shuffle-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8814/12318 [15:15:24<6:03:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8814/12318 [15:15:24<6:03:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 695 max words, 50 samples - at ../dataset/gen-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8815/12318 [15:15:32<6:03:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8815/12318 [15:15:32<6:03:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 840 max words, 50 samples - at ../dataset/gen-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8816/12318 [15:15:39<6:03:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8816/12318 [15:15:39<6:03:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 745 max words, 50 samples - at ../dataset/gen-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8817/12318 [15:15:44<6:03:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8817/12318 [15:15:44<6:03:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 600 max words, 50 samples - at ../dataset/gen-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8818/12318 [15:15:48<6:03:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8818/12318 [15:15:48<6:03:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 790 max words, 50 samples - at ../dataset/gen-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8819/12318 [15:15:51<6:03:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8819/12318 [15:15:51<6:03:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 540 max words, 50 samples - at ../dataset/gen-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8820/12318 [15:15:59<6:03:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8820/12318 [15:15:59<6:03:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 420 max words, 50 samples - at ../dataset/gen-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8821/12318 [15:16:07<6:03:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8821/12318 [15:16:07<6:03:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 825 max words, 50 samples - at ../dataset/gen-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8822/12318 [15:16:11<6:03:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8822/12318 [15:16:11<6:03:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 805 max words, 50 samples - at ../dataset/gen-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8823/12318 [15:16:17<6:02:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8823/12318 [15:16:17<6:02:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 765 max words, 50 samples - at ../dataset/gen-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8824/12318 [15:16:25<6:02:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8824/12318 [15:16:25<6:02:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 855 max words - at ../dataset/shuffle-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8825/12318 [15:16:28<6:02:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8825/12318 [15:16:28<6:02:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 965 max words, 50 samples - at ../dataset/gen-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8826/12318 [15:16:30<6:02:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8826/12318 [15:16:30<6:02:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5900 max words - at ../dataset/shuffle-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8827/12318 [15:16:34<6:02:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8827/12318 [15:16:34<6:02:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1700 max words - at ../dataset/shuffle-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8828/12318 [15:16:38<6:02:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8828/12318 [15:16:38<6:02:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 705 max words, 50 samples - at ../dataset/gen-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8829/12318 [15:16:43<6:02:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8829/12318 [15:16:43<6:02:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7900 max words - at ../dataset/shuffle-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8830/12318 [15:16:46<6:02:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8830/12318 [15:16:46<6:02:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2300 max words - at ../dataset/shuffle-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8831/12318 [15:16:53<6:02:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8831/12318 [15:16:53<6:02:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 385 max words, 50 samples - at ../dataset/gen-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8832/12318 [15:17:20<6:02:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8832/12318 [15:17:20<6:02:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 990 max words, 50 samples - at ../dataset/gen-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8833/12318 [15:17:28<6:01:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8833/12318 [15:17:28<6:01:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 515 max words, 50 samples - at ../dataset/gen-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8834/12318 [15:17:35<6:01:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8834/12318 [15:17:35<6:01:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 640 max words, 50 samples - at ../dataset/gen-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8835/12318 [15:17:40<6:01:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8835/12318 [15:17:40<6:01:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 730 max words, 50 samples - at ../dataset/gen-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8836/12318 [15:17:45<6:01:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8836/12318 [15:17:45<6:01:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 930 max words, 50 samples - at ../dataset/gen-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8837/12318 [15:17:50<6:01:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4600 max words - at ../dataset/shuffle-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8837/12318 [15:17:50<6:01:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 590 max words, 50 samples - at ../dataset/gen-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8838/12318 [15:17:56<6:01:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8838/12318 [15:17:56<6:01:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 595 max words, 50 samples - at ../dataset/gen-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8839/12318 [15:18:05<6:01:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8839/12318 [15:18:05<6:01:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3300 max words - at ../dataset/shuffle-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8840/12318 [15:18:11<6:01:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8840/12318 [15:18:11<6:01:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 820 max words, 50 samples - at ../dataset/gen-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8841/12318 [15:18:18<6:01:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8841/12318 [15:18:18<6:01:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 320 max words, 50 samples - at ../dataset/gen-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8842/12318 [15:18:24<6:01:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8842/12318 [15:18:24<6:01:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5600 max words - at ../dataset/shuffle-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8843/12318 [15:18:33<6:00:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8843/12318 [15:18:33<6:00:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 750 max words - at ../dataset/shuffle-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8844/12318 [15:18:40<6:00:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8844/12318 [15:18:40<6:00:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5300 max words - at ../dataset/shuffle-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8845/12318 [15:18:43<6:00:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8845/12318 [15:18:43<6:00:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 300 max words - at ../dataset/shuffle-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8846/12318 [15:18:51<6:00:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8846/12318 [15:18:51<6:00:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 325 max words - at ../dataset/shuffle-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8847/12318 [15:18:56<6:00:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 670 max words, 50 samples - at ../dataset/gen-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8847/12318 [15:18:56<6:00:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 730 max words - at ../dataset/shuffle-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8848/12318 [15:19:02<6:00:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8848/12318 [15:19:02<6:00:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4100 max words - at ../dataset/shuffle-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8849/12318 [15:19:08<6:00:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8849/12318 [15:19:08<6:00:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 11 samples (1 token repeat) - 205 max words - at ../dataset/shuffle-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8850/12318 [15:19:15<6:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8850/12318 [15:19:15<6:00:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 980 max words, 50 samples - at ../dataset/gen-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8851/12318 [15:19:18<6:00:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8851/12318 [15:19:18<6:00:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 935 max words, 50 samples - at ../dataset/gen-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8852/12318 [15:19:21<5:59:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8852/12318 [15:19:21<5:59:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1000 max words, 50 samples - at ../dataset/gen-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8853/12318 [15:19:24<5:59:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8853/12318 [15:19:24<5:59:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1900 max words - at ../dataset/shuffle-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8854/12318 [15:19:33<5:59:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8854/12318 [15:19:33<5:59:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 895 max words, 50 samples - at ../dataset/gen-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8855/12318 [15:19:38<5:59:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8855/12318 [15:19:38<5:59:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 870 max words - at ../dataset/shuffle-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8856/12318 [15:19:45<5:59:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8856/12318 [15:19:45<5:59:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 750 max words, 50 samples - at ../dataset/gen-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8857/12318 [15:19:48<5:59:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8857/12318 [15:19:48<5:59:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 245 max words - at ../dataset/shuffle-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8858/12318 [15:19:55<5:59:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8858/12318 [15:19:55<5:59:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 270 max words - at ../dataset/shuffle-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8859/12318 [15:20:03<5:59:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8859/12318 [15:20:03<5:59:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 380 max words - at ../dataset/shuffle-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8860/12318 [15:20:12<5:59:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8860/12318 [15:20:12<5:59:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 405 max words - at ../dataset/shuffle-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8861/12318 [15:20:17<5:59:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8861/12318 [15:20:17<5:59:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 420 max words - at ../dataset/shuffle-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8862/12318 [15:20:22<5:58:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8862/12318 [15:20:22<5:58:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 300 max words, 50 samples - at ../dataset/gen-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8863/12318 [15:20:25<5:58:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 910 max words, 50 samples - at ../dataset/gen-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8863/12318 [15:20:25<5:58:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 735 max words, 50 samples - at ../dataset/gen-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8864/12318 [15:20:47<5:58:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8864/12318 [15:20:47<5:58:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2000 max words - at ../dataset/shuffle-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8865/12318 [15:20:51<5:58:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8865/12318 [15:20:51<5:58:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 39 samples (20 token repeat) - 2400 max words - at ../dataset/shuffle-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8866/12318 [15:20:54<5:58:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8866/12318 [15:20:54<5:58:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 8000 max words - at ../dataset/shuffle-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8867/12318 [15:21:03<5:58:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8867/12318 [15:21:03<5:58:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 645 max words - at ../dataset/shuffle-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8868/12318 [15:21:09<5:58:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8868/12318 [15:21:09<5:58:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 630 max words, 50 samples - at ../dataset/gen-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8869/12318 [15:21:13<5:58:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8869/12318 [15:21:13<5:58:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7400 max words - at ../dataset/shuffle-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8870/12318 [15:21:20<5:58:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8870/12318 [15:21:20<5:58:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 305 max words, 50 samples - at ../dataset/gen-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8871/12318 [15:21:25<5:58:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8871/12318 [15:21:25<5:58:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 455 max words - at ../dataset/shuffle-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8872/12318 [15:21:32<5:57:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8872/12318 [15:21:32<5:57:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 860 max words, 50 samples - at ../dataset/gen-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8873/12318 [15:21:37<5:57:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8873/12318 [15:21:37<5:57:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7700 max words - at ../dataset/shuffle-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8874/12318 [15:21:42<5:57:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8874/12318 [15:21:42<5:57:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 185 max words, 50 samples - at ../dataset/gen-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8875/12318 [15:21:45<5:57:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8875/12318 [15:21:45<5:57:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 260 max words, 50 samples - at ../dataset/gen-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8876/12318 [15:21:53<5:57:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8876/12318 [15:21:53<5:57:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 920 max words, 50 samples - at ../dataset/gen-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8877/12318 [15:21:59<5:57:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8877/12318 [15:21:59<5:57:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 665 max words, 50 samples - at ../dataset/gen-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8878/12318 [15:22:01<5:57:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8878/12318 [15:22:01<5:57:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 565 max words, 50 samples - at ../dataset/gen-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8879/12318 [15:22:07<5:57:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8879/12318 [15:22:07<5:57:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 215 max words, 50 samples - at ../dataset/gen-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8880/12318 [15:22:16<5:57:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8880/12318 [15:22:16<5:57:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 540 max words - at ../dataset/shuffle-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8881/12318 [15:22:19<5:56:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8881/12318 [15:22:19<5:56:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 800 max words, 50 samples - at ../dataset/gen-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8882/12318 [15:22:27<5:56:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 190 max words, 50 samples - at ../dataset/gen-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8882/12318 [15:22:27<5:56:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 560 max words - at ../dataset/shuffle-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8883/12318 [15:22:35<5:56:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8883/12318 [15:22:35<5:56:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4700 max words - at ../dataset/shuffle-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8884/12318 [15:22:40<5:56:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8884/12318 [15:22:41<5:56:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 495 max words, 50 samples - at ../dataset/gen-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8885/12318 [15:22:46<5:56:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8885/12318 [15:22:46<5:56:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 360 max words, 50 samples - at ../dataset/gen-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8886/12318 [15:22:55<5:56:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8886/12318 [15:22:55<5:56:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 950 max words, 50 samples - at ../dataset/gen-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8887/12318 [15:23:02<5:56:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8887/12318 [15:23:02<5:56:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 615 max words - at ../dataset/shuffle-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8888/12318 [15:23:04<5:56:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8888/12318 [15:23:04<5:56:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6300 max words - at ../dataset/shuffle-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8889/12318 [15:23:10<5:56:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8889/12318 [15:23:10<5:56:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 845 max words - at ../dataset/shuffle-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8890/12318 [15:23:19<5:56:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8890/12318 [15:23:19<5:56:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 505 max words, 50 samples - at ../dataset/gen-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8891/12318 [15:23:21<5:55:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8891/12318 [15:23:21<5:55:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3900 max words - at ../dataset/shuffle-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8892/12318 [15:23:25<5:55:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8892/12318 [15:23:25<5:55:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 670 max words - at ../dataset/shuffle-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8893/12318 [15:23:27<5:55:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8893/12318 [15:23:27<5:55:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6400 max words - at ../dataset/shuffle-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8894/12318 [15:23:35<5:55:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8894/12318 [15:23:35<5:55:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 815 max words, 50 samples - at ../dataset/gen-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8895/12318 [15:23:37<5:55:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8895/12318 [15:23:37<5:55:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 440 max words - at ../dataset/shuffle-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8896/12318 [15:24:07<5:55:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1500 max words - at ../dataset/shuffle-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8896/12318 [15:24:07<5:55:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 925 max words - at ../dataset/shuffle-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8897/12318 [15:24:15<5:55:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8897/12318 [15:24:15<5:55:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 465 max words - at ../dataset/shuffle-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8898/12318 [15:24:17<5:55:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8898/12318 [15:24:17<5:55:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 690 max words, 50 samples - at ../dataset/gen-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8899/12318 [15:24:21<5:55:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8899/12318 [15:24:21<5:55:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 395 max words, 50 samples - at ../dataset/gen-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8900/12318 [15:24:29<5:55:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8900/12318 [15:24:29<5:55:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 265 max words, 50 samples - at ../dataset/gen-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8901/12318 [15:24:33<5:54:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8901/12318 [15:24:33<5:54:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 955 max words, 50 samples - at ../dataset/gen-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8902/12318 [15:24:38<5:54:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8902/12318 [15:24:38<5:54:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 885 max words, 50 samples - at ../dataset/gen-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8903/12318 [15:24:42<5:54:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8903/12318 [15:24:42<5:54:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 14 samples (1 token repeat) - 185 max words - at ../dataset/shuffle-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8904/12318 [15:24:45<5:54:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8904/12318 [15:24:45<5:54:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 240 max words, 50 samples - at ../dataset/gen-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8905/12318 [15:24:49<5:54:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8905/12318 [15:24:49<5:54:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6600 max words - at ../dataset/shuffle-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8906/12318 [15:24:53<5:54:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8906/12318 [15:24:53<5:54:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 440 max words, 50 samples - at ../dataset/gen-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8907/12318 [15:25:01<5:54:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8907/12318 [15:25:01<5:54:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 135 max words, 50 samples - at ../dataset/gen-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8908/12318 [15:25:05<5:54:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8908/12318 [15:25:05<5:54:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 555 max words, 50 samples - at ../dataset/gen-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8909/12318 [15:25:12<5:54:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8909/12318 [15:25:12<5:54:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 605 max words, 50 samples - at ../dataset/gen-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8910/12318 [15:25:17<5:53:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8910/12318 [15:25:17<5:53:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 465 max words, 50 samples - at ../dataset/gen-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8911/12318 [15:25:26<5:53:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8911/12318 [15:25:26<5:53:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 295 max words, 50 samples - at ../dataset/gen-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8912/12318 [15:25:35<5:53:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8912/12318 [15:25:35<5:53:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 390 max words - at ../dataset/shuffle-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8913/12318 [15:25:38<5:53:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8913/12318 [15:25:38<5:53:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 900 max words, 50 samples - at ../dataset/gen-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8914/12318 [15:25:43<5:53:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8914/12318 [15:25:43<5:53:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 975 max words, 50 samples - at ../dataset/gen-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8915/12318 [15:25:46<5:53:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8915/12318 [15:25:46<5:53:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 945 max words, 50 samples - at ../dataset/gen-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8916/12318 [15:25:51<5:53:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8916/12318 [15:25:51<5:53:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 480 max words - at ../dataset/shuffle-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8917/12318 [15:26:00<5:53:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8917/12318 [15:26:00<5:53:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4900 max words - at ../dataset/shuffle-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8918/12318 [15:26:02<5:53:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8918/12318 [15:26:02<5:53:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5800 max words - at ../dataset/shuffle-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8919/12318 [15:26:07<5:52:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8919/12318 [15:26:07<5:52:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 970 max words, 50 samples - at ../dataset/gen-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8920/12318 [15:26:14<5:52:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8920/12318 [15:26:14<5:52:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 605 max words - at ../dataset/shuffle-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8921/12318 [15:26:17<5:52:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8921/12318 [15:26:17<5:52:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 650 max words, 50 samples - at ../dataset/gen-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8922/12318 [15:26:21<5:52:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8922/12318 [15:26:21<5:52:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 985 max words - at ../dataset/shuffle-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8923/12318 [15:26:26<5:52:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8923/12318 [15:26:26<5:52:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3800 max words - at ../dataset/shuffle-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8924/12318 [15:26:30<5:52:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8924/12318 [15:26:30<5:52:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 235 max words - at ../dataset/shuffle-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8925/12318 [15:26:39<5:52:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8925/12318 [15:26:39<5:52:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7000 max words - at ../dataset/shuffle-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8926/12318 [15:26:43<5:52:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8926/12318 [15:26:43<5:52:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 570 max words - at ../dataset/shuffle-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8927/12318 [15:26:48<5:52:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8927/12318 [15:26:48<5:52:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 260 max words - at ../dataset/shuffle-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8928/12318 [15:27:16<5:52:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8928/12318 [15:27:16<5:52:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 280 max words, 50 samples - at ../dataset/gen-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8929/12318 [15:27:18<5:51:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8929/12318 [15:27:18<5:51:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 530 max words, 50 samples - at ../dataset/gen-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  72%|▋| 8930/12318 [15:27:22<5:51:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  72%|▋| 8930/12318 [15:27:22<5:51:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 2900 max words - at ../dataset/shuffle-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8931/12318 [15:27:29<5:51:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8931/12318 [15:27:29<5:51:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 775 max words - at ../dataset/shuffle-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8932/12318 [15:27:36<5:51:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8932/12318 [15:27:36<5:51:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 610 max words, 50 samples - at ../dataset/gen-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8933/12318 [15:27:45<5:51:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8933/12318 [15:27:45<5:51:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 675 max words, 50 samples - at ../dataset/gen-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8934/12318 [15:27:53<5:51:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8934/12318 [15:27:53<5:51:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 350 max words, 50 samples - at ../dataset/gen-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8935/12318 [15:27:54<5:51:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8935/12318 [15:27:54<5:51:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 710 max words, 50 samples - at ../dataset/gen-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8936/12318 [15:28:03<5:51:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8936/12318 [15:28:03<5:51:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 15 samples (1 token repeat) - 150 max words - at ../dataset/shuffle-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8937/12318 [15:28:08<5:51:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8937/12318 [15:28:08<5:51:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 475 max words, 50 samples - at ../dataset/gen-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8938/12318 [15:28:11<5:51:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8938/12318 [15:28:11<5:51:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 575 max words, 50 samples - at ../dataset/gen-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8939/12318 [15:28:19<5:50:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8939/12318 [15:28:19<5:50:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 30 samples (1 token repeat) - 90 max words - at ../dataset/shuffle-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8940/12318 [15:28:23<5:50:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8940/12318 [15:28:23<5:50:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 575 max words - at ../dataset/shuffle-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8941/12318 [15:28:28<5:50:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8941/12318 [15:28:28<5:50:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 805 max words - at ../dataset/shuffle-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8942/12318 [15:28:31<5:50:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8942/12318 [15:28:31<5:50:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 320 max words - at ../dataset/shuffle-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8943/12318 [15:28:37<5:50:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8943/12318 [15:28:37<5:50:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 665 max words - at ../dataset/shuffle-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8944/12318 [15:28:45<5:50:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8944/12318 [15:28:45<5:50:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 960 max words, 50 samples - at ../dataset/gen-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8945/12318 [15:28:48<5:50:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8945/12318 [15:28:48<5:50:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 490 max words, 50 samples - at ../dataset/gen-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8946/12318 [15:28:52<5:50:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8946/12318 [15:28:52<5:50:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 810 max words, 50 samples - at ../dataset/gen-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8947/12318 [15:28:56<5:49:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8947/12318 [15:28:56<5:49:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 695 max words - at ../dataset/shuffle-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8948/12318 [15:29:03<5:49:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8948/12318 [15:29:03<5:49:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 995 max words - at ../dataset/shuffle-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8949/12318 [15:29:11<5:49:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8949/12318 [15:29:11<5:49:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 770 max words, 50 samples - at ../dataset/gen-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8950/12318 [15:29:18<5:49:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8950/12318 [15:29:18<5:49:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 875 max words - at ../dataset/shuffle-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8951/12318 [15:29:22<5:49:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8951/12318 [15:29:22<5:49:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 395 max words - at ../dataset/shuffle-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8952/12318 [15:29:31<5:49:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8952/12318 [15:29:31<5:49:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 41 samples (20 token repeat) - 1400 max words - at ../dataset/shuffle-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8953/12318 [15:29:39<5:49:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8953/12318 [15:29:39<5:49:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 460 max words, 50 samples - at ../dataset/gen-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8954/12318 [15:29:48<5:49:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8954/12318 [15:29:48<5:49:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7600 max words - at ../dataset/shuffle-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8955/12318 [15:29:56<5:49:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8955/12318 [15:29:56<5:49:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7200 max words - at ../dataset/shuffle-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8956/12318 [15:30:01<5:49:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8956/12318 [15:30:01<5:49:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4800 max words - at ../dataset/shuffle-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8957/12318 [15:30:02<5:48:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8957/12318 [15:30:02<5:48:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3600 max words - at ../dataset/shuffle-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8958/12318 [15:30:10<5:48:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8958/12318 [15:30:10<5:48:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 535 max words, 50 samples - at ../dataset/gen-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8959/12318 [15:30:13<5:48:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8959/12318 [15:30:13<5:48:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 570 max words, 50 samples - at ../dataset/gen-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8960/12318 [15:30:35<5:48:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8960/12318 [15:30:35<5:48:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 700 max words - at ../dataset/shuffle-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8961/12318 [15:30:41<5:48:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8961/12318 [15:30:41<5:48:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 985 max words, 50 samples - at ../dataset/gen-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8962/12318 [15:30:50<5:48:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8962/12318 [15:30:50<5:48:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 510 max words, 50 samples - at ../dataset/gen-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8963/12318 [15:30:55<5:48:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8963/12318 [15:30:55<5:48:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 16 samples (1 token repeat) - 145 max words - at ../dataset/shuffle-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8964/12318 [15:30:57<5:48:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8964/12318 [15:30:57<5:48:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 340 max words - at ../dataset/shuffle-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8965/12318 [15:31:01<5:48:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8965/12318 [15:31:01<5:48:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 515 max words - at ../dataset/shuffle-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8966/12318 [15:31:04<5:48:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8966/12318 [15:31:04<5:48:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 435 max words - at ../dataset/shuffle-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8967/12318 [15:31:12<5:47:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8967/12318 [15:31:12<5:47:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 640 max words - at ../dataset/shuffle-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8968/12318 [15:31:18<5:47:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8968/12318 [15:31:18<5:47:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 775 max words, 50 samples - at ../dataset/gen-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8969/12318 [15:31:22<5:47:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8969/12318 [15:31:22<5:47:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 565 max words - at ../dataset/shuffle-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8970/12318 [15:31:30<5:47:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8970/12318 [15:31:30<5:47:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 415 max words - at ../dataset/shuffle-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8971/12318 [15:31:37<5:47:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8971/12318 [15:31:37<5:47:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 835 max words, 50 samples - at ../dataset/gen-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8972/12318 [15:31:39<5:47:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8972/12318 [15:31:39<5:47:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6700 max words - at ../dataset/shuffle-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8973/12318 [15:31:47<5:47:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8973/12318 [15:31:47<5:47:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 335 max words, 50 samples - at ../dataset/gen-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8974/12318 [15:31:52<5:47:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8974/12318 [15:31:52<5:47:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 620 max words, 50 samples - at ../dataset/gen-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8975/12318 [15:31:55<5:47:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8975/12318 [15:31:55<5:47:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 870 max words, 50 samples - at ../dataset/gen-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8976/12318 [15:32:01<5:47:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8976/12318 [15:32:01<5:47:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 890 max words - at ../dataset/shuffle-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8977/12318 [15:32:09<5:46:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8977/12318 [15:32:09<5:46:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 460 max words - at ../dataset/shuffle-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8978/12318 [15:32:14<5:46:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8978/12318 [15:32:14<5:46:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 765 max words - at ../dataset/shuffle-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8979/12318 [15:32:16<5:46:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8979/12318 [15:32:16<5:46:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 635 max words - at ../dataset/shuffle-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8980/12318 [15:32:22<5:46:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8980/12318 [15:32:22<5:46:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 500 max words, 50 samples - at ../dataset/gen-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8981/12318 [15:32:27<5:46:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8981/12318 [15:32:27<5:46:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 9 samples (1 token repeat) - 290 max words - at ../dataset/shuffle-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8982/12318 [15:32:32<5:46:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8982/12318 [15:32:32<5:46:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 720 max words, 50 samples - at ../dataset/gen-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8983/12318 [15:32:37<5:46:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8983/12318 [15:32:37<5:46:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 850 max words - at ../dataset/shuffle-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8984/12318 [15:32:44<5:46:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8984/12318 [15:32:44<5:46:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 1000 max words - at ../dataset/shuffle-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8985/12318 [15:32:53<5:46:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8985/12318 [15:32:53<5:46:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3700 max words - at ../dataset/shuffle-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8986/12318 [15:33:00<5:45:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8986/12318 [15:33:00<5:45:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 160 max words, 50 samples - at ../dataset/gen-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8987/12318 [15:33:04<5:45:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8987/12318 [15:33:04<5:45:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 505 max words - at ../dataset/shuffle-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8988/12318 [15:33:05<5:45:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8988/12318 [15:33:05<5:45:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 30 samples (20 token repeat) - 2600 max words - at ../dataset/shuffle-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8989/12318 [15:33:10<5:45:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8989/12318 [15:33:10<5:45:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 910 max words - at ../dataset/shuffle-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8990/12318 [15:33:15<5:45:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8990/12318 [15:33:15<5:45:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 22 samples (20 token repeat) - 2700 max words - at ../dataset/shuffle-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8991/12318 [15:33:23<5:45:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8991/12318 [15:33:23<5:45:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 700 max words, 50 samples - at ../dataset/gen-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8992/12318 [15:34:01<5:45:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8992/12318 [15:34:01<5:45:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5200 max words - at ../dataset/shuffle-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8993/12318 [15:34:02<5:45:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8993/12318 [15:34:02<5:45:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 710 max words - at ../dataset/shuffle-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8994/12318 [15:34:08<5:45:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8994/12318 [15:34:08<5:45:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 935 max words - at ../dataset/shuffle-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8995/12318 [15:34:14<5:45:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8995/12318 [15:34:14<5:45:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 885 max words - at ../dataset/shuffle-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8996/12318 [15:34:16<5:45:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8996/12318 [15:34:16<5:45:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3200 max words - at ../dataset/shuffle-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8997/12318 [15:34:25<5:44:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8997/12318 [15:34:25<5:44:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1800 max words - at ../dataset/shuffle-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8998/12318 [15:34:30<5:44:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8998/12318 [15:34:30<5:44:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 785 max words, 50 samples - at ../dataset/gen-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 8999/12318 [15:34:39<5:44:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 8999/12318 [15:34:39<5:44:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 475 max words - at ../dataset/shuffle-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9000/12318 [15:34:46<5:44:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9000/12318 [15:34:46<5:44:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 2200 max words - at ../dataset/shuffle-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9001/12318 [15:34:51<5:44:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9001/12318 [15:34:51<5:44:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6000 max words - at ../dataset/shuffle-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9002/12318 [15:34:56<5:44:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9002/12318 [15:34:56<5:44:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 645 max words, 50 samples - at ../dataset/gen-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9003/12318 [15:35:00<5:44:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9003/12318 [15:35:00<5:44:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 205 max words, 50 samples - at ../dataset/gen-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9004/12318 [15:35:05<5:44:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9004/12318 [15:35:05<5:44:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 650 max words - at ../dataset/shuffle-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9005/12318 [15:35:09<5:44:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9005/12318 [15:35:09<5:44:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 5 samples (1 token repeat) - 555 max words - at ../dataset/shuffle-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9006/12318 [15:35:14<5:43:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 875 max words, 50 samples - at ../dataset/gen-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9006/12318 [15:35:14<5:43:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 900 max words - at ../dataset/shuffle-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9007/12318 [15:35:18<5:43:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9007/12318 [15:35:18<5:43:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4400 max words - at ../dataset/shuffle-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9008/12318 [15:35:24<5:43:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9008/12318 [15:35:24<5:43:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 965 max words - at ../dataset/shuffle-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9009/12318 [15:35:29<5:43:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9009/12318 [15:35:29<5:43:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 950 max words - at ../dataset/shuffle-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9010/12318 [15:35:33<5:43:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9010/12318 [15:35:33<5:43:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3500 max words - at ../dataset/shuffle-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9011/12318 [15:35:41<5:43:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9011/12318 [15:35:41<5:43:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 615 max words, 50 samples - at ../dataset/gen-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9012/12318 [15:35:49<5:43:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9012/12318 [15:35:49<5:43:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5500 max words - at ../dataset/shuffle-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9013/12318 [15:35:51<5:43:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9013/12318 [15:35:51<5:43:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 4 samples (1 token repeat) - 675 max words - at ../dataset/shuffle-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9014/12318 [15:35:52<5:43:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9014/12318 [15:35:52<5:43:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 6 samples (1 token repeat) - 410 max words - at ../dataset/shuffle-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9015/12318 [15:35:54<5:42:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9015/12318 [15:35:54<5:42:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 370 max words - at ../dataset/shuffle-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9016/12318 [15:35:57<5:42:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9016/12318 [15:35:57<5:42:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 7 samples (1 token repeat) - 385 max words - at ../dataset/shuffle-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9017/12318 [15:36:03<5:42:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9017/12318 [15:36:03<5:42:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 855 max words, 50 samples - at ../dataset/gen-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9018/12318 [15:36:06<5:42:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9018/12318 [15:36:06<5:42:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7500 max words - at ../dataset/shuffle-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9019/12318 [15:36:11<5:42:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9019/12318 [15:36:11<5:42:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3400 max words - at ../dataset/shuffle-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9020/12318 [15:36:14<5:42:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9020/12318 [15:36:14<5:42:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 40 samples (20 token repeat) - 1600 max words - at ../dataset/shuffle-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9021/12318 [15:36:16<5:42:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9021/12318 [15:36:16<5:42:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 880 max words - at ../dataset/shuffle-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9022/12318 [15:36:20<5:42:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9022/12318 [15:36:20<5:42:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6800 max words - at ../dataset/shuffle-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9023/12318 [15:36:24<5:41:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9023/12318 [15:36:24<5:41:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 930 max words - at ../dataset/shuffle-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9024/12318 [15:37:23<5:42:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9024/12318 [15:37:23<5:42:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6200 max words - at ../dataset/shuffle-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9025/12318 [15:37:31<5:42:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9025/12318 [15:37:31<5:42:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 3100 max words - at ../dataset/shuffle-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9026/12318 [15:37:35<5:41:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9026/12318 [15:37:35<5:41:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 780 max words, 50 samples - at ../dataset/gen-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9027/12318 [15:37:36<5:41:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9027/12318 [15:37:36<5:41:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5000 max words - at ../dataset/shuffle-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9028/12318 [15:37:43<5:41:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9028/12318 [15:37:43<5:41:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 21 samples (20 token repeat) - 2800 max words - at ../dataset/shuffle-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9029/12318 [15:37:48<5:41:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9029/12318 [15:37:48<5:41:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6500 max words - at ../dataset/shuffle-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9030/12318 [15:37:56<5:41:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9030/12318 [15:37:56<5:41:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6900 max words - at ../dataset/shuffle-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9031/12318 [15:37:58<5:41:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9031/12318 [15:37:58<5:41:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 925 max words, 50 samples - at ../dataset/gen-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9032/12318 [15:38:03<5:41:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9032/12318 [15:38:03<5:41:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 940 max words, 50 samples - at ../dataset/gen-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9033/12318 [15:38:05<5:41:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9033/12318 [15:38:05<5:41:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 680 max words, 50 samples - at ../dataset/gen-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9034/12318 [15:38:09<5:41:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9034/12318 [15:38:09<5:41:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 6100 max words - at ../dataset/shuffle-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9035/12318 [15:38:17<5:40:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9035/12318 [15:38:17<5:40:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 485 max words, 50 samples - at ../dataset/gen-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9036/12318 [15:38:24<5:40:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 840 max words - at ../dataset/shuffle-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9036/12318 [15:38:24<5:40:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 850 max words, 50 samples - at ../dataset/gen-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9037/12318 [15:38:27<5:40:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9037/12318 [15:38:28<5:40:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4300 max words - at ../dataset/shuffle-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9038/12318 [15:38:36<5:40:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 7800 max words - at ../dataset/shuffle-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9038/12318 [15:38:36<5:40:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 865 max words, 50 samples - at ../dataset/gen-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9039/12318 [15:38:40<5:40:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9039/12318 [15:38:40<5:40:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 3 samples (1 token repeat) - 915 max words - at ../dataset/shuffle-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9040/12318 [15:38:49<5:40:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9040/12318 [15:38:49<5:40:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5700 max words - at ../dataset/shuffle-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9041/12318 [15:38:53<5:40:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9041/12318 [15:38:53<5:40:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 5100 max words - at ../dataset/shuffle-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9042/12318 [15:39:00<5:40:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9042/12318 [15:39:00<5:40:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated a single JSONL file with 20 samples (20 token repeat) - 4500 max words - at ../dataset/shuffle-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9043/12318 [15:39:03<5:40:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9043/12318 [15:39:03<5:40:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1100 max words, 2000 samples - at ../dataset/gen-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9044/12318 [15:39:12<5:40:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9044/12318 [15:39:12<5:40:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1400 max words, 2000 samples - at ../dataset/gen-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9045/12318 [15:39:20<5:39:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9045/12318 [15:39:20<5:39:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1300 max words, 2000 samples - at ../dataset/gen-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9046/12318 [15:39:26<5:39:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9046/12318 [15:39:26<5:39:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1200 max words, 2000 samples - at ../dataset/gen-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9047/12318 [15:39:32<5:39:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9047/12318 [15:39:32<5:39:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1500 max words, 2000 samples - at ../dataset/gen-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9048/12318 [15:39:39<5:39:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9048/12318 [15:39:39<5:39:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1700 max words, 2000 samples - at ../dataset/gen-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9049/12318 [15:39:46<5:39:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9049/12318 [15:39:46<5:39:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1800 max words, 2000 samples - at ../dataset/gen-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9050/12318 [15:39:51<5:39:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9050/12318 [15:39:51<5:39:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1600 max words, 2000 samples - at ../dataset/gen-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9051/12318 [15:39:56<5:39:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9051/12318 [15:39:56<5:39:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 1900 max words, 2000 samples - at ../dataset/gen-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9052/12318 [15:40:02<5:39:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9052/12318 [15:40:02<5:39:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2100 max words, 2000 samples - at ../dataset/gen-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  73%|▋| 9053/12318 [15:40:08<5:39:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  73%|▋| 9053/12318 [15:40:08<5:39:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2000 max words, 2000 samples - at ../dataset/gen-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9054/12318 [15:40:13<5:38:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9054/12318 [15:40:13<5:38:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2200 max words, 2000 samples - at ../dataset/gen-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9055/12318 [15:40:15<5:38:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9055/12318 [15:40:15<5:38:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2300 max words, 2000 samples - at ../dataset/gen-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9056/12318 [15:40:37<5:38:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9056/12318 [15:40:37<5:38:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2500 max words, 2000 samples - at ../dataset/gen-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|���| 9057/12318 [15:40:39<5:38:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9057/12318 [15:40:39<5:38:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2400 max words, 2000 samples - at ../dataset/gen-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9058/12318 [15:40:44<5:38:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9058/12318 [15:40:44<5:38:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2600 max words, 2000 samples - at ../dataset/gen-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9059/12318 [15:40:49<5:38:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9059/12318 [15:40:49<5:38:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2800 max words, 2000 samples - at ../dataset/gen-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9060/12318 [15:40:50<5:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9060/12318 [15:40:50<5:38:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2700 max words, 2000 samples - at ../dataset/gen-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9061/12318 [15:40:54<5:38:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9061/12318 [15:40:54<5:38:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3300 max words, 2000 samples - at ../dataset/gen-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9062/12318 [15:40:57<5:38:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9062/12318 [15:40:57<5:38:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3000 max words, 2000 samples - at ../dataset/gen-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9063/12318 [15:40:59<5:37:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9063/12318 [15:40:59<5:37:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3100 max words, 2000 samples - at ../dataset/gen-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9064/12318 [15:41:05<5:37:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9064/12318 [15:41:05<5:37:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 2900 max words, 2000 samples - at ../dataset/gen-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9065/12318 [15:41:10<5:37:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9065/12318 [15:41:10<5:37:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3400 max words, 2000 samples - at ../dataset/gen-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9066/12318 [15:41:14<5:37:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9066/12318 [15:41:14<5:37:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4100 max words, 2000 samples - at ../dataset/gen-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9067/12318 [15:41:21<5:37:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9067/12318 [15:41:21<5:37:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3200 max words, 2000 samples - at ../dataset/gen-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9068/12318 [15:41:25<5:37:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9068/12318 [15:41:25<5:37:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3600 max words, 2000 samples - at ../dataset/gen-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9069/12318 [15:41:33<5:37:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9069/12318 [15:41:33<5:37:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3500 max words, 2000 samples - at ../dataset/gen-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9070/12318 [15:41:39<5:37:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9070/12318 [15:41:39<5:37:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3800 max words, 2000 samples - at ../dataset/gen-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9071/12318 [15:41:44<5:37:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9071/12318 [15:41:44<5:37:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4000 max words, 2000 samples - at ../dataset/gen-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9072/12318 [15:41:46<5:36:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9072/12318 [15:41:46<5:36:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3700 max words, 2000 samples - at ../dataset/gen-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9073/12318 [15:41:50<5:36:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 3900 max words, 2000 samples - at ../dataset/gen-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9073/12318 [15:41:50<5:36:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4500 max words, 2000 samples - at ../dataset/gen-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9074/12318 [15:41:55<5:36:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9074/12318 [15:41:55<5:36:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4300 max words, 2000 samples - at ../dataset/gen-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9075/12318 [15:42:03<5:36:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9075/12318 [15:42:03<5:36:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4400 max words, 2000 samples - at ../dataset/gen-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9076/12318 [15:42:10<5:36:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9076/12318 [15:42:10<5:36:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5300 max words, 2000 samples - at ../dataset/gen-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9077/12318 [15:42:18<5:36:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9077/12318 [15:42:18<5:36:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4600 max words, 2000 samples - at ../dataset/gen-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9078/12318 [15:42:21<5:36:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9078/12318 [15:42:21<5:36:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4800 max words, 2000 samples - at ../dataset/gen-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9079/12318 [15:42:24<5:36:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9079/12318 [15:42:24<5:36:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4200 max words, 2000 samples - at ../dataset/gen-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9080/12318 [15:42:26<5:36:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9080/12318 [15:42:26<5:36:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5000 max words, 2000 samples - at ../dataset/gen-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9081/12318 [15:42:35<5:35:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9081/12318 [15:42:35<5:35:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4700 max words, 2000 samples - at ../dataset/gen-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9082/12318 [15:42:39<5:35:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9082/12318 [15:42:39<5:35:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5200 max words, 2000 samples - at ../dataset/gen-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9083/12318 [15:42:45<5:35:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9083/12318 [15:42:45<5:35:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5700 max words, 2000 samples - at ../dataset/gen-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9084/12318 [15:42:49<5:35:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9084/12318 [15:42:49<5:35:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5600 max words, 2000 samples - at ../dataset/gen-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9085/12318 [15:42:57<5:35:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9085/12318 [15:42:57<5:35:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5100 max words, 2000 samples - at ../dataset/gen-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9086/12318 [15:43:01<5:35:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9086/12318 [15:43:01<5:35:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6800 max words, 2000 samples - at ../dataset/gen-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9087/12318 [15:43:08<5:35:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9087/12318 [15:43:08<5:35:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 4900 max words, 2000 samples - at ../dataset/gen-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9088/12318 [15:43:58<5:35:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9088/12318 [15:43:58<5:35:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5400 max words, 2000 samples - at ../dataset/gen-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9089/12318 [15:44:03<5:35:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9089/12318 [15:44:03<5:35:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5800 max words, 2000 samples - at ../dataset/gen-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9090/12318 [15:44:09<5:35:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9090/12318 [15:44:09<5:35:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6900 max words, 2000 samples - at ../dataset/gen-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9091/12318 [15:44:13<5:35:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9091/12318 [15:44:13<5:35:10,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6300 max words, 2000 samples - at ../dataset/gen-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9092/12318 [15:44:22<5:35:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9092/12318 [15:44:22<5:35:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6400 max words, 2000 samples - at ../dataset/gen-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9093/12318 [15:44:28<5:34:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9093/12318 [15:44:28<5:34:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5900 max words, 2000 samples - at ../dataset/gen-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9094/12318 [15:44:35<5:34:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9094/12318 [15:44:35<5:34:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6200 max words, 2000 samples - at ../dataset/gen-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9095/12318 [15:44:38<5:34:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9095/12318 [15:44:38<5:34:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 5500 max words, 2000 samples - at ../dataset/gen-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9096/12318 [15:44:41<5:34:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9096/12318 [15:44:41<5:34:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7800 max words, 2000 samples - at ../dataset/gen-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9097/12318 [15:44:50<5:34:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9097/12318 [15:44:50<5:34:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6500 max words, 2000 samples - at ../dataset/gen-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9098/12318 [15:44:56<5:34:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9098/12318 [15:44:56<5:34:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6700 max words, 2000 samples - at ../dataset/gen-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9099/12318 [15:45:05<5:34:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9099/12318 [15:45:05<5:34:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6100 max words, 2000 samples - at ../dataset/gen-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9100/12318 [15:45:13<5:34:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9100/12318 [15:45:13<5:34:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7000 max words, 2000 samples - at ../dataset/gen-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9101/12318 [15:45:16<5:34:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9101/12318 [15:45:16<5:34:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7200 max words, 2000 samples - at ../dataset/gen-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9102/12318 [15:45:18<5:34:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9102/12318 [15:45:18<5:34:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6000 max words, 2000 samples - at ../dataset/gen-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9103/12318 [15:45:26<5:33:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9103/12318 [15:45:26<5:33:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 6600 max words, 2000 samples - at ../dataset/gen-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9104/12318 [15:45:31<5:33:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9104/12318 [15:45:31<5:33:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7100 max words, 2000 samples - at ../dataset/gen-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9105/12318 [15:45:34<5:33:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9105/12318 [15:45:34<5:33:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7400 max words, 2000 samples - at ../dataset/gen-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9106/12318 [15:45:39<5:33:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9106/12318 [15:45:39<5:33:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7300 max words, 2000 samples - at ../dataset/gen-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9107/12318 [15:45:46<5:33:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9107/12318 [15:45:46<5:33:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7500 max words, 2000 samples - at ../dataset/gen-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9108/12318 [15:45:47<5:33:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9108/12318 [15:45:47<5:33:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7900 max words, 2000 samples - at ../dataset/gen-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9109/12318 [15:45:53<5:33:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9109/12318 [15:45:53<5:33:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 8000 max words, 2000 samples - at ../dataset/gen-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9110/12318 [15:45:54<5:33:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9110/12318 [15:45:54<5:33:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7700 max words, 2000 samples - at ../dataset/gen-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9111/12318 [15:46:01<5:32:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9111/12318 [15:46:01<5:32:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generated JSONL file with - 7600 max words, 2000 samples - at ../dataset/gen-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9112/12318 [15:46:07<5:32:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9112/12318 [15:46:07<5:32:53,  6.23s/it, v_num=e4xv, train/loss"
+      "## Done ##\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9113/12318 [15:46:11<5:32:46,  6.23s/it, v_num=e4xv, train/loss"
+      "total 6.1G\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9113/12318 [15:46:11<5:32:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  11K Sep  2 06:17 gen-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9114/12318 [15:46:16<5:32:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9114/12318 [15:46:16<5:32:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  56K Sep  2 06:17 gen-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9115/12318 [15:46:18<5:32:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9115/12318 [15:46:18<5:32:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 487K Sep  2 06:17 gen-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9116/12318 [15:46:26<5:32:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9116/12318 [15:46:26<5:32:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  58K Sep  2 06:17 gen-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9117/12318 [15:46:33<5:32:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9117/12318 [15:46:33<5:32:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  58K Sep  2 06:17 gen-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9118/12318 [15:46:38<5:32:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9118/12318 [15:46:38<5:32:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  22M Sep  2 06:17 gen-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9119/12318 [15:46:44<5:32:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9119/12318 [15:46:44<5:32:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  62K Sep  2 06:17 gen-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9120/12318 [15:47:23<5:32:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  63K Sep  2 06:17 gen-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9120/12318 [15:47:23<5:32:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  23M Sep  2 06:17 gen-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9121/12318 [15:47:26<5:32:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9121/12318 [15:47:26<5:32:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  65K Sep  2 06:17 gen-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9122/12318 [15:47:34<5:31:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9122/12318 [15:47:34<5:31:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  68K Sep  2 06:17 gen-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9123/12318 [15:47:37<5:31:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9123/12318 [15:47:37<5:31:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  25M Sep  2 06:17 gen-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9124/12318 [15:47:42<5:31:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9124/12318 [15:47:42<5:31:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  72K Sep  2 06:17 gen-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9125/12318 [15:47:44<5:31:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  73K Sep  2 06:17 gen-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9125/12318 [15:47:44<5:31:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27M Sep  2 06:17 gen-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9126/12318 [15:47:50<5:31:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9126/12318 [15:47:50<5:31:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  76K Sep  2 06:17 gen-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9127/12318 [15:47:55<5:31:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9127/12318 [15:47:55<5:31:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  13K Sep  2 06:17 gen-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9128/12318 [15:48:04<5:31:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9128/12318 [15:48:04<5:31:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  81K Sep  2 06:17 gen-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9129/12318 [15:48:09<5:31:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9129/12318 [15:48:09<5:31:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29M Sep  2 06:17 gen-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9130/12318 [15:48:17<5:31:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9130/12318 [15:48:17<5:31:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  82K Sep  2 06:17 gen-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9131/12318 [15:48:26<5:31:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9131/12318 [15:48:26<5:31:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  83K Sep  2 06:17 gen-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9132/12318 [15:48:32<5:30:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9132/12318 [15:48:32<5:30:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  31M Sep  2 06:17 gen-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9133/12318 [15:48:38<5:30:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9133/12318 [15:48:38<5:30:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  87K Sep  2 06:17 gen-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9134/12318 [15:48:46<5:30:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9134/12318 [15:48:46<5:30:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  86K Sep  2 06:17 gen-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9135/12318 [15:48:52<5:30:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9135/12318 [15:48:52<5:30:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  33M Sep  2 06:17 gen-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9136/12318 [15:48:58<5:30:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  88K Sep  2 06:17 gen-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9136/12318 [15:48:58<5:30:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  93K Sep  2 06:17 gen-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9137/12318 [15:49:06<5:30:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9137/12318 [15:49:06<5:30:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  35M Sep  2 06:17 gen-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9138/12318 [15:49:15<5:30:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9138/12318 [15:49:15<5:30:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  92K Sep  2 06:17 gen-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9139/12318 [15:49:23<5:30:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9139/12318 [15:49:23<5:30:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  99K Sep  2 06:17 gen-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9140/12318 [15:49:27<5:30:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9140/12318 [15:49:27<5:30:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  37M Sep  2 06:17 gen-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9141/12318 [15:49:35<5:30:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9141/12318 [15:49:35<5:30:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 102K Sep  2 06:17 gen-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9142/12318 [15:49:40<5:29:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9142/12318 [15:49:40<5:29:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  15K Sep  2 06:17 gen-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9143/12318 [15:49:45<5:29:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9143/12318 [15:49:45<5:29:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 103K Sep  2 06:17 gen-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9144/12318 [15:49:52<5:29:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9144/12318 [15:49:52<5:29:42,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  39M Sep  2 06:17 gen-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9145/12318 [15:49:57<5:29:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9145/12318 [15:49:57<5:29:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 103K Sep  2 06:17 gen-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9146/12318 [15:50:02<5:29:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9146/12318 [15:50:02<5:29:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 108K Sep  2 06:17 gen-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9147/12318 [15:50:06<5:29:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9147/12318 [15:50:06<5:29:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  41M Sep  2 06:17 gen-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9148/12318 [15:50:15<5:29:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9148/12318 [15:50:15<5:29:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 113K Sep  2 06:17 gen-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9149/12318 [15:50:17<5:29:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9149/12318 [15:50:17<5:29:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 114K Sep  2 06:17 gen-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9150/12318 [15:50:24<5:29:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9150/12318 [15:50:24<5:29:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  42M Sep  2 06:17 gen-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9151/12318 [15:50:28<5:28:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9151/12318 [15:50:28<5:28:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 116K Sep  2 06:17 gen-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9152/12318 [15:50:46<5:28:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9152/12318 [15:50:46<5:28:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 114K Sep  2 06:17 gen-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9153/12318 [15:50:51<5:28:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9153/12318 [15:50:51<5:28:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  44M Sep  2 06:17 gen-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9154/12318 [15:50:56<5:28:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9154/12318 [15:50:56<5:28:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 117K Sep  2 06:17 gen-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9155/12318 [15:51:05<5:28:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9155/12318 [15:51:05<5:28:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 124K Sep  2 06:17 gen-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9156/12318 [15:51:13<5:28:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9156/12318 [15:51:13<5:28:30,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  46M Sep  2 06:17 gen-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9157/12318 [15:51:16<5:28:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9157/12318 [15:51:16<5:28:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 122K Sep  2 06:17 gen-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9158/12318 [15:51:22<5:28:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9158/12318 [15:51:22<5:28:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  17K Sep  2 06:17 gen-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9159/12318 [15:51:26<5:28:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9159/12318 [15:51:26<5:28:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 125K Sep  2 06:17 gen-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9160/12318 [15:51:30<5:28:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9160/12318 [15:51:30<5:28:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  48M Sep  2 06:17 gen-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9161/12318 [15:51:38<5:27:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9161/12318 [15:51:38<5:27:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 128K Sep  2 06:17 gen-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9162/12318 [15:51:42<5:27:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9162/12318 [15:51:42<5:27:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 134K Sep  2 06:17 gen-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9163/12318 [15:51:47<5:27:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9163/12318 [15:51:47<5:27:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  50M Sep  2 06:17 gen-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9164/12318 [15:51:50<5:27:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9164/12318 [15:51:50<5:27:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 135K Sep  2 06:17 gen-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9165/12318 [15:51:57<5:27:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9165/12318 [15:51:57<5:27:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 134K Sep  2 06:17 gen-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9166/12318 [15:52:01<5:27:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9166/12318 [15:52:01<5:27:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  52M Sep  2 06:17 gen-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9167/12318 [15:52:06<5:27:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9167/12318 [15:52:06<5:27:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 140K Sep  2 06:17 gen-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9168/12318 [15:52:14<5:27:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9168/12318 [15:52:14<5:27:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 143K Sep  2 06:17 gen-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9169/12318 [15:52:19<5:27:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9169/12318 [15:52:19<5:27:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  54M Sep  2 06:17 gen-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9170/12318 [15:52:27<5:26:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9170/12318 [15:52:27<5:26:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 141K Sep  2 06:17 gen-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9171/12318 [15:52:29<5:26:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9171/12318 [15:52:29<5:26:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 144K Sep  2 06:17 gen-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9172/12318 [15:52:33<5:26:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9172/12318 [15:52:33<5:26:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  56M Sep  2 06:17 gen-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9173/12318 [15:52:42<5:26:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9173/12318 [15:52:42<5:26:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 150K Sep  2 06:17 gen-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9174/12318 [15:52:43<5:26:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9174/12318 [15:52:43<5:26:30,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  20K Sep  2 06:17 gen-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9175/12318 [15:52:49<5:26:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9175/12318 [15:52:49<5:26:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 155K Sep  2 06:17 gen-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  74%|▋| 9176/12318 [15:52:53<5:26:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  74%|▋| 9176/12318 [15:52:53<5:26:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  58M Sep  2 06:17 gen-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9177/12318 [15:53:00<5:26:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9177/12318 [15:53:00<5:26:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 155K Sep  2 06:17 gen-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9178/12318 [15:53:03<5:26:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9178/12318 [15:53:03<5:26:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 156K Sep  2 06:17 gen-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9179/12318 [15:53:12<5:25:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9179/12318 [15:53:12<5:25:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  60M Sep  2 06:17 gen-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9180/12318 [15:53:18<5:25:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9180/12318 [15:53:18<5:25:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 157K Sep  2 06:17 gen-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9181/12318 [15:53:26<5:25:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9181/12318 [15:53:26<5:25:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 159K Sep  2 06:17 gen-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9182/12318 [15:53:33<5:25:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9182/12318 [15:53:33<5:25:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  61M Sep  2 06:17 gen-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9183/12318 [15:53:41<5:25:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9183/12318 [15:53:41<5:25:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 166K Sep  2 06:17 gen-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9184/12318 [15:54:05<5:25:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9184/12318 [15:54:05<5:25:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 166K Sep  2 06:17 gen-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9185/12318 [15:54:09<5:25:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9185/12318 [15:54:09<5:25:27,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  63M Sep  2 06:17 gen-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9186/12318 [15:54:16<5:25:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9186/12318 [15:54:16<5:25:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 165K Sep  2 06:17 gen-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9187/12318 [15:54:18<5:25:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9187/12318 [15:54:18<5:25:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 171K Sep  2 06:17 gen-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9188/12318 [15:54:24<5:25:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9188/12318 [15:54:24<5:25:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  65M Sep  2 06:17 gen-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9189/12318 [15:54:31<5:25:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9189/12318 [15:54:31<5:25:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 171K Sep  2 06:17 gen-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9190/12318 [15:54:38<5:24:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9190/12318 [15:54:38<5:24:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  21K Sep  2 06:17 gen-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9191/12318 [15:54:47<5:24:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9191/12318 [15:54:47<5:24:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 175K Sep  2 06:17 gen-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9192/12318 [15:54:50<5:24:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9192/12318 [15:54:50<5:24:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  67M Sep  2 06:17 gen-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9193/12318 [15:54:57<5:24:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9193/12318 [15:54:57<5:24:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 175K Sep  2 06:17 gen-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9194/12318 [15:55:01<5:24:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9194/12318 [15:55:01<5:24:30,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 180K Sep  2 06:17 gen-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9195/12318 [15:55:08<5:24:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9195/12318 [15:55:08<5:24:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  69M Sep  2 06:17 gen-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9196/12318 [15:55:12<5:24:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9196/12318 [15:55:12<5:24:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 181K Sep  2 06:17 gen-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9197/12318 [15:55:17<5:24:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9197/12318 [15:55:17<5:24:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 185K Sep  2 06:17 gen-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9198/12318 [15:55:26<5:24:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9198/12318 [15:55:26<5:24:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  71M Sep  2 06:17 gen-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9199/12318 [15:55:30<5:23:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9199/12318 [15:55:30<5:23:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 190K Sep  2 06:17 gen-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9200/12318 [15:55:39<5:23:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9200/12318 [15:55:39<5:23:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 189K Sep  2 06:17 gen-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9201/12318 [15:55:40<5:23:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9201/12318 [15:55:40<5:23:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  73M Sep  2 06:17 gen-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9202/12318 [15:55:42<5:23:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9202/12318 [15:55:42<5:23:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 190K Sep  2 06:17 gen-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9203/12318 [15:55:48<5:23:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9203/12318 [15:55:48<5:23:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 195K Sep  2 06:17 gen-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9204/12318 [15:55:55<5:23:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9204/12318 [15:55:55<5:23:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  75M Sep  2 06:17 gen-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9205/12318 [15:55:56<5:23:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9205/12318 [15:55:56<5:23:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 199K Sep  2 06:17 gen-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9206/12318 [15:56:02<5:23:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9206/12318 [15:56:02<5:23:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  25K Sep  2 06:17 gen-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9207/12318 [15:56:10<5:23:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9207/12318 [15:56:10<5:23:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 200K Sep  2 06:17 gen-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9208/12318 [15:56:15<5:22:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9208/12318 [15:56:15<5:22:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  77M Sep  2 06:17 gen-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9209/12318 [15:56:24<5:22:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9209/12318 [15:56:24<5:22:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 205K Sep  2 06:17 gen-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9210/12318 [15:56:25<5:22:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9210/12318 [15:56:25<5:22:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 208K Sep  2 06:17 gen-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9211/12318 [15:56:34<5:22:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9211/12318 [15:56:34<5:22:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  79M Sep  2 06:17 gen-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9212/12318 [15:56:38<5:22:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9212/12318 [15:56:38<5:22:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 206K Sep  2 06:17 gen-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9213/12318 [15:56:39<5:22:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9213/12318 [15:56:39<5:22:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 212K Sep  2 06:17 gen-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9214/12318 [15:56:48<5:22:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9214/12318 [15:56:48<5:22:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  80M Sep  2 06:17 gen-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9215/12318 [15:56:51<5:22:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9215/12318 [15:56:51<5:22:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9216/12318 [15:57:14<5:22:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9216/12318 [15:57:14<5:22:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9217/12318 [15:57:20<5:22:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9217/12318 [15:57:20<5:22:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  82M Sep  2 06:17 gen-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9218/12318 [15:57:23<5:21:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9218/12318 [15:57:23<5:21:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 214K Sep  2 06:17 gen-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9219/12318 [15:57:31<5:21:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9219/12318 [15:57:31<5:21:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 219K Sep  2 06:17 gen-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9220/12318 [15:57:32<5:21:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9220/12318 [15:57:32<5:21:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  84M Sep  2 06:17 gen-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9221/12318 [15:57:35<5:21:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9221/12318 [15:57:35<5:21:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 222K Sep  2 06:17 gen-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9222/12318 [15:57:39<5:21:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9222/12318 [15:57:39<5:21:30,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 gen-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9223/12318 [15:57:48<5:21:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9223/12318 [15:57:48<5:21:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 223K Sep  2 06:17 gen-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9224/12318 [15:57:51<5:21:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9224/12318 [15:57:51<5:21:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  86M Sep  2 06:17 gen-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9225/12318 [15:57:55<5:21:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9225/12318 [15:57:55<5:21:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 226K Sep  2 06:17 gen-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9226/12318 [15:57:57<5:21:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 231K Sep  2 06:17 gen-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9226/12318 [15:57:57<5:21:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  88M Sep  2 06:17 gen-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9227/12318 [15:58:05<5:20:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9227/12318 [15:58:05<5:20:57,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 233K Sep  2 06:17 gen-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9228/12318 [15:58:09<5:20:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9228/12318 [15:58:09<5:20:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 230K Sep  2 06:17 gen-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9229/12318 [15:58:12<5:20:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9229/12318 [15:58:12<5:20:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  90M Sep  2 06:17 gen-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9230/12318 [15:58:15<5:20:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9230/12318 [15:58:15<5:20:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 238K Sep  2 06:17 gen-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9231/12318 [15:58:22<5:20:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9231/12318 [15:58:22<5:20:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 236K Sep  2 06:17 gen-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9232/12318 [15:58:30<5:20:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9232/12318 [15:58:30<5:20:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  92M Sep  2 06:17 gen-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9233/12318 [15:58:36<5:20:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9233/12318 [15:58:36<5:20:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 241K Sep  2 06:17 gen-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9234/12318 [15:58:38<5:20:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9234/12318 [15:58:38<5:20:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 239K Sep  2 06:17 gen-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9235/12318 [15:58:39<5:20:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9235/12318 [15:58:39<5:20:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  94M Sep  2 06:17 gen-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9236/12318 [15:58:47<5:19:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 245K Sep  2 06:17 gen-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9236/12318 [15:58:47<5:19:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 7.3K Sep  2 06:17 gen-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9237/12318 [15:58:48<5:19:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9237/12318 [15:58:48<5:19:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 gen-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▋| 9238/12318 [15:58:55<5:19:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▋| 9238/12318 [15:58:55<5:19:42,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 248K Sep  2 06:17 gen-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9239/12318 [15:58:57<5:19:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9239/12318 [15:58:57<5:19:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  96M Sep  2 06:17 gen-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9240/12318 [15:59:05<5:19:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9240/12318 [15:59:05<5:19:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 252K Sep  2 06:17 gen-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9241/12318 [15:59:10<5:19:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9241/12318 [15:59:10<5:19:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 256K Sep  2 06:17 gen-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9242/12318 [15:59:17<5:19:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9242/12318 [15:59:17<5:19:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  98M Sep  2 06:17 gen-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9243/12318 [15:59:23<5:19:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9243/12318 [15:59:23<5:19:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 261K Sep  2 06:17 gen-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9244/12318 [15:59:27<5:19:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9244/12318 [15:59:27<5:19:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 262K Sep  2 06:17 gen-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9245/12318 [15:59:30<5:18:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9245/12318 [15:59:30<5:18:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  99M Sep  2 06:17 gen-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9246/12318 [15:59:36<5:18:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9246/12318 [15:59:36<5:18:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 261K Sep  2 06:17 gen-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9247/12318 [15:59:41<5:18:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9247/12318 [15:59:41<5:18:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 262K Sep  2 06:17 gen-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9248/12318 [16:00:34<5:18:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9248/12318 [16:00:34<5:18:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 101M Sep  2 06:17 gen-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9249/12318 [16:00:42<5:18:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9249/12318 [16:00:42<5:18:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 266K Sep  2 06:17 gen-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9250/12318 [16:00:46<5:18:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9250/12318 [16:00:46<5:18:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 264K Sep  2 06:17 gen-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9251/12318 [16:00:52<5:18:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9251/12318 [16:00:52<5:18:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 103M Sep  2 06:17 gen-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9252/12318 [16:01:01<5:18:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9252/12318 [16:01:01<5:18:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 273K Sep  2 06:17 gen-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9253/12318 [16:01:06<5:18:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9253/12318 [16:01:06<5:18:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 gen-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9254/12318 [16:01:10<5:18:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9254/12318 [16:01:10<5:18:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 277K Sep  2 06:17 gen-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9255/12318 [16:01:17<5:18:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9255/12318 [16:01:17<5:18:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 105M Sep  2 06:17 gen-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9256/12318 [16:01:24<5:18:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9256/12318 [16:01:24<5:18:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 278K Sep  2 06:17 gen-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9257/12318 [16:01:29<5:17:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9257/12318 [16:01:29<5:17:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 272K Sep  2 06:17 gen-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9258/12318 [16:01:36<5:17:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9258/12318 [16:01:36<5:17:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 107M Sep  2 06:17 gen-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9259/12318 [16:01:41<5:17:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9259/12318 [16:01:41<5:17:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 282K Sep  2 06:17 gen-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9260/12318 [16:01:46<5:17:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9260/12318 [16:01:46<5:17:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 283K Sep  2 06:17 gen-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9261/12318 [16:01:48<5:17:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9261/12318 [16:01:48<5:17:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 109M Sep  2 06:17 gen-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9262/12318 [16:01:50<5:17:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9262/12318 [16:01:50<5:17:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 285K Sep  2 06:17 gen-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9263/12318 [16:01:57<5:17:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9263/12318 [16:01:57<5:17:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 290K Sep  2 06:17 gen-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9264/12318 [16:02:03<5:17:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9264/12318 [16:02:03<5:17:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 111M Sep  2 06:17 gen-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9265/12318 [16:02:08<5:17:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9265/12318 [16:02:08<5:17:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 286K Sep  2 06:17 gen-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9266/12318 [16:02:12<5:16:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9266/12318 [16:02:12<5:16:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 290K Sep  2 06:17 gen-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9267/12318 [16:02:19<5:16:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9267/12318 [16:02:19<5:16:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 113M Sep  2 06:17 gen-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9268/12318 [16:02:26<5:16:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9268/12318 [16:02:26<5:16:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 291K Sep  2 06:17 gen-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9269/12318 [16:02:29<5:16:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9269/12318 [16:02:29<5:16:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  35K Sep  2 06:17 gen-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9270/12318 [16:02:30<5:16:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9270/12318 [16:02:30<5:16:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 302K Sep  2 06:17 gen-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9271/12318 [16:02:39<5:16:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9271/12318 [16:02:39<5:16:23,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 115M Sep  2 06:17 gen-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9272/12318 [16:02:43<5:16:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9272/12318 [16:02:43<5:16:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 298K Sep  2 06:17 gen-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9273/12318 [16:02:46<5:16:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9273/12318 [16:02:46<5:16:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 304K Sep  2 06:17 gen-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9274/12318 [16:02:51<5:16:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9274/12318 [16:02:51<5:16:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 117M Sep  2 06:17 gen-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9275/12318 [16:02:55<5:15:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9275/12318 [16:02:55<5:15:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 306K Sep  2 06:17 gen-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9276/12318 [16:03:02<5:15:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9276/12318 [16:03:02<5:15:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 304K Sep  2 06:17 gen-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9277/12318 [16:03:08<5:15:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9277/12318 [16:03:08<5:15:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 118M Sep  2 06:17 gen-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9278/12318 [16:03:15<5:15:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9278/12318 [16:03:15<5:15:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 306K Sep  2 06:17 gen-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9279/12318 [16:03:23<5:15:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9279/12318 [16:03:23<5:15:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 310K Sep  2 06:17 gen-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9280/12318 [16:04:01<5:15:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9280/12318 [16:04:01<5:15:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 120M Sep  2 06:17 gen-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9281/12318 [16:04:07<5:15:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9281/12318 [16:04:07<5:15:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 312K Sep  2 06:17 gen-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9282/12318 [16:04:11<5:15:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9282/12318 [16:04:11<5:15:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 317K Sep  2 06:17 gen-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9283/12318 [16:04:14<5:15:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9283/12318 [16:04:14<5:15:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 122M Sep  2 06:17 gen-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9284/12318 [16:04:17<5:15:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9284/12318 [16:04:17<5:15:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 322K Sep  2 06:17 gen-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9285/12318 [16:04:26<5:15:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9285/12318 [16:04:26<5:15:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  36K Sep  2 06:17 gen-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9286/12318 [16:04:32<5:14:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9286/12318 [16:04:32<5:14:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 322K Sep  2 06:17 gen-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9287/12318 [16:04:36<5:14:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9287/12318 [16:04:36<5:14:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 124M Sep  2 06:17 gen-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9288/12318 [16:04:38<5:14:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9288/12318 [16:04:38<5:14:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 328K Sep  2 06:17 gen-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9289/12318 [16:04:41<5:14:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9289/12318 [16:04:41<5:14:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 325K Sep  2 06:17 gen-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9290/12318 [16:04:47<5:14:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9290/12318 [16:04:47<5:14:27,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 126M Sep  2 06:17 gen-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9291/12318 [16:04:53<5:14:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9291/12318 [16:04:53<5:14:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 329K Sep  2 06:17 gen-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9292/12318 [16:04:55<5:14:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9292/12318 [16:04:55<5:14:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 334K Sep  2 06:17 gen-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9293/12318 [16:05:01<5:14:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9293/12318 [16:05:01<5:14:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 128M Sep  2 06:17 gen-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9294/12318 [16:05:08<5:14:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9294/12318 [16:05:08<5:14:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 332K Sep  2 06:17 gen-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9295/12318 [16:05:09<5:13:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9295/12318 [16:05:09<5:13:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 333K Sep  2 06:17 gen-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9296/12318 [16:05:17<5:13:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9296/12318 [16:05:17<5:13:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 130M Sep  2 06:17 gen-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9297/12318 [16:05:21<5:13:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9297/12318 [16:05:21<5:13:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 341K Sep  2 06:17 gen-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9298/12318 [16:05:22<5:13:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9298/12318 [16:05:22<5:13:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 338K Sep  2 06:17 gen-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9299/12318 [16:05:26<5:13:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9299/12318 [16:05:26<5:13:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 132M Sep  2 06:17 gen-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  75%|▊| 9300/12318 [16:05:34<5:13:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  75%|▊| 9300/12318 [16:05:34<5:13:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 340K Sep  2 06:17 gen-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9301/12318 [16:05:43<5:13:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9301/12318 [16:05:43<5:13:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  39K Sep  2 06:17 gen-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9302/12318 [16:05:48<5:13:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9302/12318 [16:05:48<5:13:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 342K Sep  2 06:17 gen-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9303/12318 [16:05:50<5:13:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9303/12318 [16:05:50<5:13:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 134M Sep  2 06:17 gen-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9304/12318 [16:05:59<5:12:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9304/12318 [16:05:59<5:12:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 351K Sep  2 06:17 gen-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9305/12318 [16:06:03<5:12:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9305/12318 [16:06:03<5:12:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 347K Sep  2 06:17 gen-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9306/12318 [16:06:06<5:12:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9306/12318 [16:06:06<5:12:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 136M Sep  2 06:17 gen-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9307/12318 [16:06:15<5:12:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9307/12318 [16:06:15<5:12:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 352K Sep  2 06:17 gen-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9308/12318 [16:06:17<5:12:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9308/12318 [16:06:17<5:12:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 355K Sep  2 06:17 gen-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9309/12318 [16:06:24<5:12:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9309/12318 [16:06:24<5:12:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 137M Sep  2 06:17 gen-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9310/12318 [16:06:30<5:12:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9310/12318 [16:06:30<5:12:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 354K Sep  2 06:17 gen-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9311/12318 [16:06:36<5:12:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9311/12318 [16:06:36<5:12:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 358K Sep  2 06:17 gen-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9312/12318 [16:07:10<5:12:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9312/12318 [16:07:10<5:12:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 139M Sep  2 06:17 gen-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9313/12318 [16:07:13<5:12:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9313/12318 [16:07:13<5:12:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9314/12318 [16:07:19<5:11:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9314/12318 [16:07:19<5:11:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 363K Sep  2 06:17 gen-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9315/12318 [16:07:25<5:11:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9315/12318 [16:07:25<5:11:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 141M Sep  2 06:17 gen-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9316/12318 [16:07:30<5:11:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9316/12318 [16:07:30<5:11:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 364K Sep  2 06:17 gen-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9317/12318 [16:07:39<5:11:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9317/12318 [16:07:39<5:11:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  42K Sep  2 06:17 gen-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9318/12318 [16:07:43<5:11:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9318/12318 [16:07:43<5:11:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9319/12318 [16:07:48<5:11:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9319/12318 [16:07:48<5:11:27,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 143M Sep  2 06:17 gen-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9320/12318 [16:07:53<5:11:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9320/12318 [16:07:53<5:11:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 366K Sep  2 06:17 gen-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9321/12318 [16:07:57<5:11:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9321/12318 [16:07:57<5:11:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 376K Sep  2 06:17 gen-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9322/12318 [16:08:06<5:11:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9322/12318 [16:08:06<5:11:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 145M Sep  2 06:17 gen-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9323/12318 [16:08:11<5:11:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9323/12318 [16:08:11<5:11:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 372K Sep  2 06:17 gen-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9324/12318 [16:08:17<5:10:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9324/12318 [16:08:17<5:10:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9325/12318 [16:08:18<5:10:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9325/12318 [16:08:18<5:10:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 147M Sep  2 06:17 gen-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9326/12318 [16:08:20<5:10:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9326/12318 [16:08:20<5:10:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 387K Sep  2 06:17 gen-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9327/12318 [16:08:21<5:10:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9327/12318 [16:08:21<5:10:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9328/12318 [16:08:23<5:10:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9328/12318 [16:08:23<5:10:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 149M Sep  2 06:17 gen-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9329/12318 [16:08:31<5:10:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9329/12318 [16:08:31<5:10:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 381K Sep  2 06:17 gen-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9330/12318 [16:08:40<5:10:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9330/12318 [16:08:40<5:10:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 390K Sep  2 06:17 gen-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9331/12318 [16:08:45<5:10:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 151M Sep  2 06:17 gen-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9331/12318 [16:08:46<5:10:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 391K Sep  2 06:17 gen-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9332/12318 [16:08:48<5:09:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9332/12318 [16:08:48<5:09:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  44K Sep  2 06:17 gen-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9333/12318 [16:08:54<5:09:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9333/12318 [16:08:54<5:09:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 391K Sep  2 06:17 gen-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9334/12318 [16:09:01<5:09:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9334/12318 [16:09:01<5:09:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 153M Sep  2 06:17 gen-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9335/12318 [16:09:04<5:09:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9335/12318 [16:09:04<5:09:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 396K Sep  2 06:17 gen-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9336/12318 [16:09:09<5:09:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9336/12318 [16:09:09<5:09:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 400K Sep  2 06:17 gen-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9337/12318 [16:09:13<5:09:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9337/12318 [16:09:13<5:09:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 398K Sep  2 06:17 gen-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9338/12318 [16:09:20<5:09:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9338/12318 [16:09:20<5:09:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 409K Sep  2 06:17 gen-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9339/12318 [16:09:25<5:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9339/12318 [16:09:25<5:09:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 403K Sep  2 06:17 gen-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9340/12318 [16:09:32<5:09:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9340/12318 [16:09:32<5:09:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 409K Sep  2 06:17 gen-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9341/12318 [16:09:40<5:09:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 411K Sep  2 06:17 gen-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9341/12318 [16:09:40<5:09:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 414K Sep  2 06:17 gen-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9342/12318 [16:09:42<5:08:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9342/12318 [16:09:42<5:08:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 413K Sep  2 06:17 gen-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9343/12318 [16:09:44<5:08:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9343/12318 [16:09:44<5:08:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  46K Sep  2 06:17 gen-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9344/12318 [16:10:29<5:08:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9344/12318 [16:10:29<5:08:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 412K Sep  2 06:17 gen-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9345/12318 [16:10:35<5:08:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9345/12318 [16:10:35<5:08:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 418K Sep  2 06:17 gen-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9346/12318 [16:10:36<5:08:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9346/12318 [16:10:36<5:08:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 417K Sep  2 06:17 gen-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9347/12318 [16:10:45<5:08:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9347/12318 [16:10:45<5:08:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9348/12318 [16:10:49<5:08:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9348/12318 [16:10:49<5:08:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 429K Sep  2 06:17 gen-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9349/12318 [16:10:51<5:08:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9349/12318 [16:10:51<5:08:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 429K Sep  2 06:17 gen-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9350/12318 [16:10:54<5:08:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9350/12318 [16:10:54<5:08:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9351/12318 [16:11:01<5:08:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9351/12318 [16:11:01<5:08:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 437K Sep  2 06:17 gen-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9352/12318 [16:11:06<5:07:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9352/12318 [16:11:06<5:07:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 432K Sep  2 06:17 gen-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9353/12318 [16:11:08<5:07:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9353/12318 [16:11:08<5:07:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 446K Sep  2 06:17 gen-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9354/12318 [16:11:10<5:07:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9354/12318 [16:11:10<5:07:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  49K Sep  2 06:17 gen-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9355/12318 [16:11:17<5:07:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9355/12318 [16:11:17<5:07:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 448K Sep  2 06:17 gen-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9356/12318 [16:11:24<5:07:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9356/12318 [16:11:24<5:07:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 448K Sep  2 06:17 gen-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9357/12318 [16:11:26<5:07:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9357/12318 [16:11:26<5:07:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 441K Sep  2 06:17 gen-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9358/12318 [16:11:32<5:07:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9358/12318 [16:11:32<5:07:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 449K Sep  2 06:17 gen-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9359/12318 [16:11:35<5:07:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9359/12318 [16:11:35<5:07:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 450K Sep  2 06:17 gen-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9360/12318 [16:11:39<5:07:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9360/12318 [16:11:39<5:07:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 460K Sep  2 06:17 gen-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9361/12318 [16:11:48<5:06:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9361/12318 [16:11:48<5:06:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 464K Sep  2 06:17 gen-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9362/12318 [16:11:50<5:06:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9362/12318 [16:11:50<5:06:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 458K Sep  2 06:17 gen-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9363/12318 [16:11:55<5:06:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9363/12318 [16:11:55<5:06:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 465K Sep  2 06:17 gen-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9364/12318 [16:11:57<5:06:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9364/12318 [16:11:57<5:06:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 467K Sep  2 06:17 gen-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9365/12318 [16:12:01<5:06:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9365/12318 [16:12:01<5:06:30,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  53K Sep  2 06:17 gen-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9366/12318 [16:12:10<5:06:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9366/12318 [16:12:10<5:06:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 466K Sep  2 06:17 gen-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9367/12318 [16:12:15<5:06:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9367/12318 [16:12:15<5:06:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 464K Sep  2 06:17 gen-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9368/12318 [16:12:22<5:06:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9368/12318 [16:12:22<5:06:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 473K Sep  2 06:17 gen-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9369/12318 [16:12:26<5:06:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9369/12318 [16:12:26<5:06:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 474K Sep  2 06:17 gen-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9370/12318 [16:12:29<5:05:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9370/12318 [16:12:29<5:05:57,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 475K Sep  2 06:17 gen-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9371/12318 [16:12:38<5:05:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9371/12318 [16:12:38<5:05:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 477K Sep  2 06:17 gen-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9372/12318 [16:12:41<5:05:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9372/12318 [16:12:41<5:05:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 483K Sep  2 06:17 gen-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9373/12318 [16:12:43<5:05:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9373/12318 [16:12:43<5:05:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 485K Sep  2 06:17 gen-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9374/12318 [16:12:48<5:05:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9374/12318 [16:12:48<5:05:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 486K Sep  2 06:17 gen-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9375/12318 [16:12:53<5:05:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9375/12318 [16:12:53<5:05:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 486K Sep  2 06:17 gen-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9376/12318 [16:13:51<5:05:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9376/12318 [16:13:51<5:05:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  52K Sep  2 06:17 shuffle-word-10-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9377/12318 [16:13:56<5:05:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9377/12318 [16:13:56<5:05:27,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9378/12318 [16:13:59<5:05:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9378/12318 [16:13:59<5:05:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-1000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9379/12318 [16:14:08<5:05:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9379/12318 [16:14:08<5:05:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-105-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9380/12318 [16:14:17<5:05:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9380/12318 [16:14:17<5:05:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-110-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9381/12318 [16:14:19<5:05:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9381/12318 [16:14:19<5:05:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 523K Sep  2 06:17 shuffle-word-1100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9382/12318 [16:14:22<5:04:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9382/12318 [16:14:22<5:04:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-115-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9383/12318 [16:14:28<5:04:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9383/12318 [16:14:28<5:04:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-120-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9384/12318 [16:14:32<5:04:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9384/12318 [16:14:32<5:04:42,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 523K Sep  2 06:17 shuffle-word-1200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9385/12318 [16:14:36<5:04:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9385/12318 [16:14:36<5:04:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-125-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9386/12318 [16:14:44<5:04:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9386/12318 [16:14:44<5:04:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-130-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9387/12318 [16:14:52<5:04:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9387/12318 [16:14:52<5:04:23,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 519K Sep  2 06:17 shuffle-word-1300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9388/12318 [16:14:56<5:04:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9388/12318 [16:14:56<5:04:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-135-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9389/12318 [16:15:03<5:04:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9389/12318 [16:15:03<5:04:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-140-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9390/12318 [16:15:11<5:04:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9390/12318 [16:15:11<5:04:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 518K Sep  2 06:17 shuffle-word-1400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9391/12318 [16:15:19<5:03:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9391/12318 [16:15:19<5:03:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-145-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9392/12318 [16:15:26<5:03:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9392/12318 [16:15:26<5:03:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  45K Sep  2 06:17 shuffle-word-15-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9393/12318 [16:15:28<5:03:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9393/12318 [16:15:28<5:03:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-150-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9394/12318 [16:15:36<5:03:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9394/12318 [16:15:36<5:03:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9395/12318 [16:15:42<5:03:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9395/12318 [16:15:42<5:03:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-155-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9396/12318 [16:15:44<5:03:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9396/12318 [16:15:44<5:03:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-160-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9397/12318 [16:15:50<5:03:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9397/12318 [16:15:50<5:03:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9398/12318 [16:15:59<5:03:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9398/12318 [16:15:59<5:03:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-165-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9399/12318 [16:16:02<5:03:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9399/12318 [16:16:02<5:03:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-170-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9400/12318 [16:16:10<5:03:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9400/12318 [16:16:10<5:03:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-1700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9401/12318 [16:16:12<5:02:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9401/12318 [16:16:12<5:02:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-175-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9402/12318 [16:16:16<5:02:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9402/12318 [16:16:16<5:02:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-180-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9403/12318 [16:16:19<5:02:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9403/12318 [16:16:19<5:02:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-1800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9404/12318 [16:16:23<5:02:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9404/12318 [16:16:23<5:02:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-185-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9405/12318 [16:16:26<5:02:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9405/12318 [16:16:26<5:02:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-190-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9406/12318 [16:16:34<5:02:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|��| 9406/12318 [16:16:34<5:02:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-1900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9407/12318 [16:16:43<5:02:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9407/12318 [16:16:43<5:02:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-195-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9408/12318 [16:17:14<5:02:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9408/12318 [16:17:14<5:02:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  40K Sep  2 06:17 shuffle-word-20-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9409/12318 [16:17:20<5:02:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9409/12318 [16:17:20<5:02:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9410/12318 [16:17:25<5:02:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9410/12318 [16:17:25<5:02:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 521K Sep  2 06:17 shuffle-word-2000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9411/12318 [16:17:29<5:01:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9411/12318 [16:17:29<5:01:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-205-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9412/12318 [16:17:34<5:01:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9412/12318 [16:17:34<5:01:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-210-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9413/12318 [16:17:42<5:01:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9413/12318 [16:17:42<5:01:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-2100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9414/12318 [16:17:48<5:01:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9414/12318 [16:17:48<5:01:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-215-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9415/12318 [16:17:54<5:01:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9415/12318 [16:17:54<5:01:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-220-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9416/12318 [16:18:01<5:01:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9416/12318 [16:18:01<5:01:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-2200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9417/12318 [16:18:06<5:01:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9417/12318 [16:18:06<5:01:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-225-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9418/12318 [16:18:09<5:01:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9418/12318 [16:18:09<5:01:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-230-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9419/12318 [16:18:13<5:01:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9419/12318 [16:18:13<5:01:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 520K Sep  2 06:17 shuffle-word-2300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9420/12318 [16:18:17<5:00:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9420/12318 [16:18:17<5:00:57,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-235-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9421/12318 [16:18:25<5:00:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9421/12318 [16:18:25<5:00:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-240-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9422/12318 [16:18:31<5:00:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9422/12318 [16:18:31<5:00:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 522K Sep  2 06:17 shuffle-word-2400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  76%|▊| 9423/12318 [16:18:34<5:00:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  76%|▊| 9423/12318 [16:18:34<5:00:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-245-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9424/12318 [16:18:39<5:00:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9424/12318 [16:18:39<5:00:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  35K Sep  2 06:17 shuffle-word-25-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9425/12318 [16:18:48<5:00:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9425/12318 [16:18:48<5:00:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-250-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9426/12318 [16:18:54<5:00:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9426/12318 [16:18:54<5:00:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 518K Sep  2 06:17 shuffle-word-2500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9427/12318 [16:18:59<5:00:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9427/12318 [16:18:59<5:00:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-255-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9428/12318 [16:19:07<5:00:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9428/12318 [16:19:07<5:00:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-260-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9429/12318 [16:19:13<5:00:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9429/12318 [16:19:13<5:00:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 516K Sep  2 06:17 shuffle-word-2600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9430/12318 [16:19:21<4:59:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9430/12318 [16:19:21<4:59:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-265-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9431/12318 [16:19:29<4:59:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9431/12318 [16:19:29<4:59:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-270-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9432/12318 [16:19:34<4:59:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9432/12318 [16:19:34<4:59:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 510K Sep  2 06:17 shuffle-word-2700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9433/12318 [16:19:37<4:59:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9433/12318 [16:19:37<4:59:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-275-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9434/12318 [16:19:46<4:59:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9434/12318 [16:19:46<4:59:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-280-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9435/12318 [16:19:51<4:59:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9435/12318 [16:19:51<4:59:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-2800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9436/12318 [16:19:56<4:59:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9436/12318 [16:19:56<4:59:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-285-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9437/12318 [16:20:00<4:59:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9437/12318 [16:20:00<4:59:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-290-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9438/12318 [16:20:09<4:59:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9438/12318 [16:20:09<4:59:05,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-2900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9439/12318 [16:20:16<4:58:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9439/12318 [16:20:16<4:58:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-295-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9440/12318 [16:20:33<4:58:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9440/12318 [16:20:33<4:58:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  34K Sep  2 06:17 shuffle-word-30-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9441/12318 [16:20:42<4:58:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9441/12318 [16:20:42<4:58:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9442/12318 [16:20:50<4:58:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9442/12318 [16:20:50<4:58:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9443/12318 [16:20:56<4:58:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9443/12318 [16:20:56<4:58:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-305-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9444/12318 [16:20:59<4:58:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9444/12318 [16:20:59<4:58:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-310-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9445/12318 [16:21:05<4:58:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9445/12318 [16:21:05<4:58:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9446/12318 [16:21:12<4:58:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9446/12318 [16:21:12<4:58:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-315-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9447/12318 [16:21:18<4:58:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9447/12318 [16:21:18<4:58:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-320-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9448/12318 [16:21:24<4:58:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9448/12318 [16:21:24<4:58:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9449/12318 [16:21:31<4:58:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9449/12318 [16:21:31<4:58:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-325-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9450/12318 [16:21:38<4:57:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9450/12318 [16:21:38<4:57:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-330-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9451/12318 [16:21:47<4:57:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9451/12318 [16:21:47<4:57:49,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-3300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9452/12318 [16:21:56<4:57:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9452/12318 [16:21:56<4:57:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-335-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9453/12318 [16:22:02<4:57:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9453/12318 [16:22:02<4:57:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-340-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9454/12318 [16:22:07<4:57:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9454/12318 [16:22:07<4:57:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9455/12318 [16:22:15<4:57:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9455/12318 [16:22:15<4:57:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-345-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9456/12318 [16:22:22<4:57:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9456/12318 [16:22:22<4:57:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  33K Sep  2 06:17 shuffle-word-35-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9457/12318 [16:22:29<4:57:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9457/12318 [16:22:29<4:57:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-350-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9458/12318 [16:22:34<4:57:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9458/12318 [16:22:34<4:57:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9459/12318 [16:22:38<4:57:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9459/12318 [16:22:38<4:57:00,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-355-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9460/12318 [16:22:45<4:56:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9460/12318 [16:22:45<4:56:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-360-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9461/12318 [16:22:48<4:56:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9461/12318 [16:22:48<4:56:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9462/12318 [16:22:52<4:56:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9462/12318 [16:22:52<4:56:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-365-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9463/12318 [16:22:55<4:56:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9463/12318 [16:22:55<4:56:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-370-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9464/12318 [16:22:59<4:56:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9464/12318 [16:22:59<4:56:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9465/12318 [16:23:01<4:56:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9465/12318 [16:23:01<4:56:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-375-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9466/12318 [16:23:10<4:56:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9466/12318 [16:23:10<4:56:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-380-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9467/12318 [16:23:19<4:56:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9467/12318 [16:23:19<4:56:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9468/12318 [16:23:25<4:56:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9468/12318 [16:23:25<4:56:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-385-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9469/12318 [16:23:26<4:55:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9469/12318 [16:23:26<4:55:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-390-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9470/12318 [16:23:29<4:55:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9470/12318 [16:23:29<4:55:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-3900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9471/12318 [16:23:31<4:55:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9471/12318 [16:23:31<4:55:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-395-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9472/12318 [16:23:56<4:55:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9472/12318 [16:23:56<4:55:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  32K Sep  2 06:17 shuffle-word-40-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9473/12318 [16:24:00<4:55:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9473/12318 [16:24:00<4:55:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9474/12318 [16:24:01<4:55:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9474/12318 [16:24:01<4:55:23,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9475/12318 [16:24:02<4:55:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9475/12318 [16:24:02<4:55:15,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-405-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9476/12318 [16:24:10<4:55:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-410-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9476/12318 [16:24:10<4:55:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9477/12318 [16:24:12<4:55:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9477/12318 [16:24:12<4:55:02,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-415-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9478/12318 [16:24:15<4:54:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-420-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9478/12318 [16:24:15<4:54:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9479/12318 [16:24:19<4:54:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9479/12318 [16:24:19<4:54:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-425-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9480/12318 [16:24:27<4:54:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9480/12318 [16:24:27<4:54:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-430-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9481/12318 [16:24:33<4:54:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9481/12318 [16:24:33<4:54:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9482/12318 [16:24:36<4:54:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9482/12318 [16:24:36<4:54:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-435-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9483/12318 [16:24:44<4:54:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9483/12318 [16:24:44<4:54:23,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-440-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9484/12318 [16:24:53<4:54:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9484/12318 [16:24:53<4:54:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9485/12318 [16:24:56<4:54:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9485/12318 [16:24:56<4:54:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-445-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9486/12318 [16:24:57<4:54:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9486/12318 [16:24:57<4:54:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  33K Sep  2 06:17 shuffle-word-45-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9487/12318 [16:25:06<4:53:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9487/12318 [16:25:06<4:53:57,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-450-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9488/12318 [16:25:15<4:53:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9488/12318 [16:25:15<4:53:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9489/12318 [16:25:23<4:53:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9489/12318 [16:25:23<4:53:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-455-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9490/12318 [16:25:32<4:53:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9490/12318 [16:25:32<4:53:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-460-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9491/12318 [16:25:36<4:53:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9491/12318 [16:25:36<4:53:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-4600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9492/12318 [16:25:43<4:53:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9492/12318 [16:25:43<4:53:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-465-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9493/12318 [16:25:46<4:53:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9493/12318 [16:25:46<4:53:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-470-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9494/12318 [16:25:49<4:53:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9494/12318 [16:25:49<4:53:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9495/12318 [16:25:52<4:53:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9495/12318 [16:25:52<4:53:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-475-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9496/12318 [16:25:57<4:53:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9496/12318 [16:25:57<4:53:00,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-480-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9497/12318 [16:26:06<4:52:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9497/12318 [16:26:06<4:52:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-485-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9498/12318 [16:26:13<4:52:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9498/12318 [16:26:13<4:52:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-490-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9499/12318 [16:26:22<4:52:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9499/12318 [16:26:22<4:52:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-4900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9500/12318 [16:26:31<4:52:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9500/12318 [16:26:31<4:52:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-495-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9501/12318 [16:26:35<4:52:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9501/12318 [16:26:35<4:52:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  85K Sep  2 06:17 shuffle-word-5-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9502/12318 [16:26:39<4:52:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9502/12318 [16:26:39<4:52:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  32K Sep  2 06:17 shuffle-word-50-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9503/12318 [16:26:46<4:52:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9503/12318 [16:26:46<4:52:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9504/12318 [16:27:09<4:52:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9504/12318 [16:27:09<4:52:17,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9505/12318 [16:27:15<4:52:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9505/12318 [16:27:15<4:52:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-505-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9506/12318 [16:27:20<4:52:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9506/12318 [16:27:20<4:52:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-510-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9507/12318 [16:27:27<4:51:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9507/12318 [16:27:27<4:51:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9508/12318 [16:27:33<4:51:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9508/12318 [16:27:33<4:51:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-515-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9509/12318 [16:27:34<4:51:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9509/12318 [16:27:34<4:51:43,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-520-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9510/12318 [16:27:41<4:51:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9510/12318 [16:27:41<4:51:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9511/12318 [16:27:47<4:51:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9511/12318 [16:27:47<4:51:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-525-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9512/12318 [16:27:51<4:51:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9512/12318 [16:27:51<4:51:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-530-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9513/12318 [16:27:55<4:51:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9513/12318 [16:27:55<4:51:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9514/12318 [16:28:00<4:51:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9514/12318 [16:28:00<4:51:11,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-535-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9515/12318 [16:28:05<4:51:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9515/12318 [16:28:05<4:51:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-540-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9516/12318 [16:28:13<4:50:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9516/12318 [16:28:13<4:50:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9517/12318 [16:28:21<4:50:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9517/12318 [16:28:21<4:50:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-545-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9518/12318 [16:28:22<4:50:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9518/12318 [16:28:22<4:50:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 shuffle-word-55-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9519/12318 [16:28:27<4:50:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9519/12318 [16:28:27<4:50:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-550-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9520/12318 [16:28:32<4:50:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9520/12318 [16:28:32<4:50:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-5500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9521/12318 [16:28:39<4:50:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9521/12318 [16:28:39<4:50:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-555-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9522/12318 [16:28:44<4:50:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9522/12318 [16:28:44<4:50:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-560-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9523/12318 [16:28:48<4:50:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9523/12318 [16:28:48<4:50:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9524/12318 [16:28:57<4:50:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9524/12318 [16:28:57<4:50:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-565-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9525/12318 [16:29:04<4:50:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9525/12318 [16:29:04<4:50:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-570-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9526/12318 [16:29:11<4:49:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9526/12318 [16:29:11<4:49:55,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9527/12318 [16:29:12<4:49:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9527/12318 [16:29:12<4:49:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-575-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9528/12318 [16:29:16<4:49:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9528/12318 [16:29:16<4:49:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-580-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9529/12318 [16:29:25<4:49:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9529/12318 [16:29:25<4:49:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9530/12318 [16:29:29<4:49:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9530/12318 [16:29:29<4:49:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-585-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9531/12318 [16:29:33<4:49:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9531/12318 [16:29:33<4:49:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-590-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9532/12318 [16:29:34<4:49:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9532/12318 [16:29:34<4:49:14,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-5900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9533/12318 [16:29:43<4:49:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9533/12318 [16:29:43<4:49:08,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-595-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9534/12318 [16:29:48<4:49:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9534/12318 [16:29:48<4:49:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  30K Sep  2 06:17 shuffle-word-60-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9535/12318 [16:29:57<4:48:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9535/12318 [16:29:57<4:48:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9536/12318 [16:30:32<4:48:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9536/12318 [16:30:32<4:48:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9537/12318 [16:30:40<4:48:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9537/12318 [16:30:40<4:48:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-605-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9538/12318 [16:30:43<4:48:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9538/12318 [16:30:43<4:48:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-610-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9539/12318 [16:30:48<4:48:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9539/12318 [16:30:48<4:48:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9540/12318 [16:30:52<4:48:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9540/12318 [16:30:52<4:48:32,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-615-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9541/12318 [16:31:01<4:48:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9541/12318 [16:31:01<4:48:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-620-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9542/12318 [16:31:06<4:48:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9542/12318 [16:31:06<4:48:20,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9543/12318 [16:31:11<4:48:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9543/12318 [16:31:11<4:48:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-625-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9544/12318 [16:31:16<4:48:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9544/12318 [16:31:16<4:48:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-630-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9545/12318 [16:31:23<4:48:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9545/12318 [16:31:23<4:48:00,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  77%|▊| 9546/12318 [16:31:25<4:47:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  77%|▊| 9546/12318 [16:31:25<4:47:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-635-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9547/12318 [16:31:32<4:47:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9547/12318 [16:31:32<4:47:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-640-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9548/12318 [16:31:39<4:47:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9548/12318 [16:31:39<4:47:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9549/12318 [16:31:44<4:47:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-645-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9549/12318 [16:31:44<4:47:35,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  31K Sep  2 06:17 shuffle-word-65-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9550/12318 [16:31:53<4:47:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9550/12318 [16:31:53<4:47:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-650-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9551/12318 [16:31:55<4:47:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9551/12318 [16:31:55<4:47:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9552/12318 [16:32:03<4:47:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9552/12318 [16:32:03<4:47:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-655-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9553/12318 [16:32:10<4:47:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9553/12318 [16:32:10<4:47:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-660-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9554/12318 [16:32:15<4:47:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9554/12318 [16:32:15<4:47:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-6600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9555/12318 [16:32:24<4:46:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9555/12318 [16:32:24<4:46:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-665-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9556/12318 [16:32:29<4:46:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9556/12318 [16:32:29<4:46:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-670-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9557/12318 [16:32:33<4:46:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9557/12318 [16:32:33<4:46:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-6700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9558/12318 [16:32:42<4:46:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9558/12318 [16:32:42<4:46:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-675-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9559/12318 [16:32:49<4:46:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9559/12318 [16:32:49<4:46:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-680-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9560/12318 [16:32:51<4:46:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9560/12318 [16:32:51<4:46:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9561/12318 [16:32:56<4:46:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9561/12318 [16:32:56<4:46:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-685-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9562/12318 [16:33:02<4:46:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9562/12318 [16:33:02<4:46:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-690-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9563/12318 [16:33:10<4:46:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-6900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9563/12318 [16:33:10<4:46:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-695-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9564/12318 [16:33:18<4:46:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9564/12318 [16:33:18<4:46:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  30K Sep  2 06:17 shuffle-word-70-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9565/12318 [16:33:19<4:45:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9565/12318 [16:33:19<4:45:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9566/12318 [16:33:28<4:45:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-7000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9566/12318 [16:33:28<4:45:48,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-705-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9567/12318 [16:33:31<4:45:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9567/12318 [16:33:31<4:45:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-710-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9568/12318 [16:34:03<4:45:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9568/12318 [16:34:03<4:45:42,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7100-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9569/12318 [16:34:08<4:45:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9569/12318 [16:34:08<4:45:36,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-715-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9570/12318 [16:34:10<4:45:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9570/12318 [16:34:10<4:45:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-720-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9571/12318 [16:34:18<4:45:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9571/12318 [16:34:18<4:45:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7200-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9572/12318 [16:34:25<4:45:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9572/12318 [16:34:25<4:45:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-725-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9573/12318 [16:34:27<4:45:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9573/12318 [16:34:27<4:45:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-730-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9574/12318 [16:34:29<4:45:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9574/12318 [16:34:29<4:45:01,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7300-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9575/12318 [16:34:31<4:44:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9575/12318 [16:34:31<4:44:54,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-735-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9576/12318 [16:34:34<4:44:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9576/12318 [16:34:34<4:44:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-740-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9577/12318 [16:34:41<4:44:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9577/12318 [16:34:41<4:44:41,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 508K Sep  2 06:17 shuffle-word-7400-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9578/12318 [16:34:45<4:44:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9578/12318 [16:34:45<4:44:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-745-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9579/12318 [16:34:51<4:44:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9579/12318 [16:34:51<4:44:28,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-75-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9580/12318 [16:35:00<4:44:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9580/12318 [16:35:00<4:44:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-750-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9581/12318 [16:35:07<4:44:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9581/12318 [16:35:07<4:44:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7500-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9582/12318 [16:35:13<4:44:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9582/12318 [16:35:13<4:44:10,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-755-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9583/12318 [16:35:20<4:44:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9583/12318 [16:35:20<4:44:04,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-760-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9584/12318 [16:35:27<4:43:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9584/12318 [16:35:27<4:43:58,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7600-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9585/12318 [16:35:32<4:43:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-765-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9585/12318 [16:35:32<4:43:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-770-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9586/12318 [16:35:40<4:43:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9586/12318 [16:35:40<4:43:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7700-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9587/12318 [16:35:44<4:43:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9587/12318 [16:35:44<4:43:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-775-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9588/12318 [16:35:51<4:43:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9588/12318 [16:35:51<4:43:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-780-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9589/12318 [16:35:55<4:43:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9589/12318 [16:35:55<4:43:26,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9590/12318 [16:35:59<4:43:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9590/12318 [16:35:59<4:43:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-785-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9591/12318 [16:36:06<4:43:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9591/12318 [16:36:06<4:43:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-790-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9592/12318 [16:36:13<4:43:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9592/12318 [16:36:13<4:43:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-7900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9593/12318 [16:36:15<4:42:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9593/12318 [16:36:15<4:42:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-795-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9594/12318 [16:36:17<4:42:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9594/12318 [16:36:17<4:42:52,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-80-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9595/12318 [16:36:22<4:42:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9595/12318 [16:36:22<4:42:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-800-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9596/12318 [16:36:26<4:42:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9596/12318 [16:36:26<4:42:39,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 509K Sep  2 06:17 shuffle-word-8000-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9597/12318 [16:36:29<4:42:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9597/12318 [16:36:29<4:42:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-805-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9598/12318 [16:36:35<4:42:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9598/12318 [16:36:35<4:42:25,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-810-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9599/12318 [16:36:41<4:42:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9599/12318 [16:36:41<4:42:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-815-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9600/12318 [16:37:16<4:42:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9600/12318 [16:37:16<4:42:21,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-820-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9601/12318 [16:37:43<4:42:20,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9601/12318 [16:37:43<4:42:20,  6.24s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-825-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9602/12318 [16:37:48<4:42:14,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9602/12318 [16:37:48<4:42:14,  6.24s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-830-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9603/12318 [16:37:53<4:42:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9603/12318 [16:37:53<4:42:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-835-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9604/12318 [16:37:56<4:42:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9604/12318 [16:37:56<4:42:00,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-840-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9605/12318 [16:38:00<4:41:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9605/12318 [16:38:00<4:41:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-845-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9606/12318 [16:38:05<4:41:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9606/12318 [16:38:05<4:41:47,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-85-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9607/12318 [16:38:11<4:41:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9607/12318 [16:38:11<4:41:40,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  28K Sep  2 06:17 shuffle-word-850-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9608/12318 [16:38:19<4:41:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9608/12318 [16:38:19<4:41:34,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-855-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9609/12318 [16:38:27<4:41:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9609/12318 [16:38:27<4:41:29,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-860-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9610/12318 [16:38:31<4:41:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9610/12318 [16:38:31<4:41:22,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-865-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9611/12318 [16:38:37<4:41:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9611/12318 [16:38:37<4:41:16,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-870-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9612/12318 [16:38:42<4:41:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9612/12318 [16:38:42<4:41:09,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-875-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9613/12318 [16:38:48<4:41:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9613/12318 [16:38:48<4:41:03,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-880-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9614/12318 [16:38:54<4:40:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9614/12318 [16:38:54<4:40:56,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-885-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9615/12318 [16:38:59<4:40:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9615/12318 [16:38:59<4:40:50,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-890-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9616/12318 [16:39:08<4:40:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9616/12318 [16:39:08<4:40:44,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-895-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9617/12318 [16:39:11<4:40:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9617/12318 [16:39:11<4:40:37,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  29K Sep  2 06:17 shuffle-word-90-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9618/12318 [16:39:16<4:40:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9618/12318 [16:39:16<4:40:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-900-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9619/12318 [16:39:20<4:40:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9619/12318 [16:39:20<4:40:24,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-905-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9620/12318 [16:39:27<4:40:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9620/12318 [16:39:27<4:40:18,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-910-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9621/12318 [16:39:35<4:40:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9621/12318 [16:39:35<4:40:12,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-915-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9622/12318 [16:39:40<4:40:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9622/12318 [16:39:40<4:40:06,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-920-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9623/12318 [16:39:45<4:39:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9623/12318 [16:39:45<4:39:59,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-925-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9624/12318 [16:39:47<4:39:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-930-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9624/12318 [16:39:47<4:39:51,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-935-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9625/12318 [16:39:52<4:39:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9625/12318 [16:39:52<4:39:45,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-940-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9626/12318 [16:39:56<4:39:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9626/12318 [16:39:56<4:39:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-945-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9627/12318 [16:40:05<4:39:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9627/12318 [16:40:05<4:39:33,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-95-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9628/12318 [16:40:13<4:39:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9628/12318 [16:40:13<4:39:27,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-950-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9629/12318 [16:40:14<4:39:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9629/12318 [16:40:14<4:39:19,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-955-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9630/12318 [16:40:22<4:39:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9630/12318 [16:40:22<4:39:13,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-960-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9631/12318 [16:40:28<4:39:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9631/12318 [16:40:28<4:39:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-965-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9632/12318 [16:40:55<4:39:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9632/12318 [16:40:55<4:39:07,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-970-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9633/12318 [16:40:58<4:39:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9633/12318 [16:40:58<4:39:00,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-975-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9634/12318 [16:41:02<4:38:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-980-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9634/12318 [16:41:02<4:38:53,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-985-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9635/12318 [16:41:06<4:38:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9635/12318 [16:41:06<4:38:46,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  26K Sep  2 06:17 shuffle-word-990-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9636/12318 [16:41:07<4:38:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9636/12318 [16:41:07<4:38:38,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root  27K Sep  2 06:17 shuffle-word-995-count.jsonl\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9637/12318 [16:41:12<4:38:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9637/12318 [16:41:12<4:38:31,  6.23s/it, v_num=e4xv, train/loss"
+      "-rw-r--r-- 1 root root 6.0K Sep  2 06:17 word-2-count.jsonl\n"
      ]
+    }
+   ],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 50 &\n",
+    "for i in {5..1000..5} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 50 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 4200 words dataset\n",
+    "# \n",
+    "for i in {1100..8000..100} \n",
+    "do\n",
+    "    python ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 2000 & \n",
+    "    python ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -lh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "2eb840b5",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-02T06:17:49.255288Z",
+     "iopub.status.busy": "2023-09-02T06:17:49.254615Z"
+    },
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": false,
+     "start_time": "2023-09-02T06:17:49.184772",
+     "status": "running"
     },
+    "tags": []
+   },
+   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9638/12318 [16:41:19<4:38:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9638/12318 [16:41:19<4:38:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9639/12318 [16:41:26<4:38:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9639/12318 [16:41:26<4:38:19,  6.23s/it, v_num=e4xv, train/loss"
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9640/12318 [16:41:33<4:38:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9640/12318 [16:41:33<4:38:14,  6.23s/it, v_num=e4xv, train/loss"
+      "/usr/local/lib/python3.11/dist-packages/lightning/pytorch/cli.py:484: UserWarning: LightningCLI's args parameter is intended to run from within Python like if it were from the command line. To prevent mistakes it is not recommended to provide both args and command line arguments, got: sys.argv[1:]=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'], args=['fit', '-c', '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/config-mem-template.yaml', '--trainer.logger.init_args.name=v5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)', '--trainer.strategy=deepspeed_stage_1', '--trainer.devices=auto', '--trainer.callbacks.init_args.dirpath=../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/', '--model.lr_init=3e-4', '--model.lr_final=1e-4', '--data.max_token_size=8192', '--model.ctx_len=4096', '--model.bptt_learning_range=2', '--model.load_model=../model/v5-L96-D1024-E0_1-mem-ctx-4k.pth'].\r\n",
+      "  rank_zero_warn(\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9641/12318 [16:41:40<4:38:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9641/12318 [16:41:40<4:38:07,  6.23s/it, v_num=e4xv, train/loss"
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/utilities/seed.py:39: UserWarning: No seed found, seed set to 4016710040\r\n",
+      "  rank_zero_warn(f\"No seed found, seed set to {seed}\")\r\n",
+      "Global seed set to 4016710040\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9642/12318 [16:41:45<4:38:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9642/12318 [16:41:45<4:38:01,  6.23s/it, v_num=e4xv, train/loss"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mpicocreator\u001b[0m (\u001b[33mrwkv-x-dev\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9643/12318 [16:41:47<4:37:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9643/12318 [16:41:47<4:37:54,  6.23s/it, v_num=e4xv, train/loss"
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.15.9\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m./wandb/run-20230902_061755-f2kzi2o7\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments\u001b[0m\r\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/f2kzi2o7\u001b[0m\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9644/12318 [16:41:55<4:37:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9644/12318 [16:41:55<4:37:48,  6.23s/it, v_num=e4xv, train/loss"
+      "/usr/local/lib/python3.11/dist-packages/lightning/fabric/connector.py:562: UserWarning: bf16 is supported for historical reasons but its usage is discouraged. Please set your precision to bf16-mixed instead!\r\n",
+      "  rank_zero_warn(\r\n",
+      "GPU available: True (cuda), used: True\r\n",
+      "TPU available: False, using: 0 TPU cores\r\n",
+      "IPU available: False, using: 0 IPUs\r\n",
+      "HPU available: False, using: 0 HPUs\r\n",
+      "\r\n",
+      "\r\n",
+      "[RWKV.Trainer] Applying 'target_batch_size' with the following:\r\n",
+      "   - target_batch_size:       256\r\n",
+      "   - num_nodes:               1\r\n",
+      "   - num_devices:             8\r\n",
+      "   - accumulate_grad_batches: 32\r\n",
+      "   - effective_batch_size:    256\r\n",
+      "\r\n"
      ]
     },
     {
@@ -112807,17 +10645,20 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9645/12318 [16:41:59<4:37:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9645/12318 [16:41:59<4:37:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Resolving data files:   0%|                             | 0/541 [00:00<?, ?it/s]\r",
+      "Resolving data files: 100%|███████████████| 541/541 [00:00<00:00, 219217.32it/s]\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Downloading and preparing dataset json/default to /actions-runner/.cache/huggingface/datasets/json/default-5b99ac9e8f2083f3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96...\r\n",
+      "\r",
+      "Downloading data files:   0%|                             | 0/1 [00:00<?, ?it/s]\r",
+      "Downloading data files: 100%|██████��█████████████| 1/1 [00:00<00:00, 144.05it/s]\r\n",
       "\r",
-      "Epoch 0:  78%|▊| 9646/12318 [16:42:06<4:37:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9646/12318 [16:42:06<4:37:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Extracting data files:   0%|                              | 0/1 [00:00<?, ?it/s]"
      ]
     },
     {
@@ -112825,134 +10666,115 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9647/12318 [16:42:11<4:37:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9647/12318 [16:42:11<4:37:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.11it/s]\r",
+      "Extracting data files: 100%|██████████████████████| 1/1 [00:00<00:00,  6.09it/s]\r\n",
+      "\r",
+      "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9648/12318 [16:42:14<4:37:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9648/12318 [16:42:14<4:37:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9649/12318 [16:42:18<4:37:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9649/12318 [16:42:18<4:37:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n",
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9650/12318 [16:42:22<4:37:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9650/12318 [16:42:22<4:37:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n",
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9651/12318 [16:42:31<4:37:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9651/12318 [16:42:31<4:37:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9652/12318 [16:42:36<4:36:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9652/12318 [16:42:36<4:36:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Setting ds_accelerator to cuda (auto detect)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9653/12318 [16:42:45<4:36:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9653/12318 [16:42:45<4:36:50,  6.23s/it, v_num=e4xv, train/loss"
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9654/12318 [16:42:46<4:36:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9654/12318 [16:42:46<4:36:42,  6.23s/it, v_num=e4xv, train/loss"
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9655/12318 [16:42:53<4:36:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9655/12318 [16:42:53<4:36:36,  6.23s/it, v_num=e4xv, train/loss"
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n",
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9656/12318 [16:42:59<4:36:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9656/12318 [16:42:59<4:36:30,  6.23s/it, v_num=e4xv, train/loss"
+      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9657/12318 [16:43:05<4:36:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9657/12318 [16:43:05<4:36:24,  6.23s/it, v_num=e4xv, train/loss"
+      "[rank: 1] Global seed set to 4016710040\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9658/12318 [16:43:09<4:36:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9658/12318 [16:43:09<4:36:17,  6.23s/it, v_num=e4xv, train/loss"
+      "[rank: 3] Global seed set to 4016710040\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9659/12318 [16:43:14<4:36:10,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9659/12318 [16:43:14<4:36:10,  6.23s/it, v_num=e4xv, train/loss"
+      "[rank: 4] Global seed set to 4016710040\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9660/12318 [16:43:21<4:36:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9660/12318 [16:43:21<4:36:04,  6.23s/it, v_num=e4xv, train/loss"
+      "[rank: 6] Global seed set to 4016710040\r\n",
+      "[rank: 7] Global seed set to 4016710040\r\n",
+      "[rank: 2] Global seed set to 4016710040\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  78%|▊| 9661/12318 [16:43:30<4:35:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9661/12318 [16:43:30<4:35:59,  6.23s/it, v_num=e4xv, train/loss"
+      "[rank: 5] Global seed set to 4016710040\r\n"
      ]
     },
     {
@@ -112960,8 +10782,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9662/12318 [16:43:32<4:35:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9662/12318 [16:43:32<4:35:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 1196 examples [00:10, 111.12 examples/s]"
      ]
     },
     {
@@ -112969,8 +10790,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9663/12318 [16:43:35<4:35:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9663/12318 [16:43:35<4:35:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 10026 examples [00:10, 1263.89 examples/s]"
      ]
     },
     {
@@ -112978,8 +10798,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9664/12318 [16:44:10<4:35:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9664/12318 [16:44:10<4:35:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 21237 examples [00:11, 3282.25 examples/s]"
      ]
     },
     {
@@ -112987,8 +10806,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9665/12318 [16:44:12<4:35:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9665/12318 [16:44:12<4:35:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 30015 examples [00:11, 5412.46 examples/s]"
      ]
     },
     {
@@ -112996,8 +10814,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9666/12318 [16:44:20<4:35:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9666/12318 [16:44:20<4:35:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 38572 examples [00:11, 8118.18 examples/s]"
      ]
     },
     {
@@ -113005,8 +10822,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9667/12318 [16:44:24<4:35:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9667/12318 [16:44:24<4:35:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 46262 examples [00:11, 11051.07 examples/s]"
      ]
     },
     {
@@ -113014,8 +10830,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9668/12318 [16:44:28<4:35:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9668/12318 [16:44:28<4:35:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 52932 examples [00:11, 14163.28 examples/s]"
      ]
     },
     {
@@ -113023,8 +10838,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  78%|▊| 9669/12318 [16:44:33<4:35:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  78%|▊| 9669/12318 [16:44:33<4:35:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 59424 examples [00:11, 17679.28 examples/s]"
      ]
     },
     {
@@ -113032,8 +10846,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9670/12318 [16:44:35<4:35:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9670/12318 [16:44:35<4:35:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 66940 examples [00:11, 22906.06 examples/s]"
      ]
     },
     {
@@ -113041,8 +10854,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9671/12318 [16:44:41<4:34:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9671/12318 [16:44:41<4:34:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 72972 examples [00:11, 24826.65 examples/s]"
      ]
     },
     {
@@ -113050,8 +10862,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9672/12318 [16:44:44<4:34:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9672/12318 [16:44:44<4:34:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 78377 examples [00:12, 27244.59 examples/s]"
      ]
     },
     {
@@ -113059,8 +10870,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9673/12318 [16:44:47<4:34:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9673/12318 [16:44:47<4:34:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 83241 examples [00:12, 27947.59 examples/s]"
      ]
     },
     {
@@ -113068,8 +10878,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9674/12318 [16:44:49<4:34:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9674/12318 [16:44:49<4:34:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 87440 examples [00:12, 27873.38 examples/s]"
      ]
     },
     {
@@ -113077,8 +10886,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9675/12318 [16:44:56<4:34:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9675/12318 [16:44:56<4:34:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 91429 examples [00:12, 26415.39 examples/s]"
      ]
     },
     {
@@ -113086,8 +10894,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9676/12318 [16:45:01<4:34:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9676/12318 [16:45:01<4:34:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 94971 examples [00:12, 27319.61 examples/s]"
      ]
     },
     {
@@ -113095,8 +10902,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9677/12318 [16:45:06<4:34:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9677/12318 [16:45:06<4:34:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 98304 examples [00:12, 28128.14 examples/s]"
      ]
     },
     {
@@ -113104,8 +10910,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9678/12318 [16:45:11<4:34:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9678/12318 [16:45:11<4:34:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 101778 examples [00:12, 26589.06 examples/s]"
      ]
     },
     {
@@ -113113,8 +10918,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9679/12318 [16:45:19<4:34:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9679/12318 [16:45:19<4:34:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 104707 examples [00:13, 26921.37 examples/s]"
      ]
     },
     {
@@ -113122,7 +10926,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9680/12318 [16:45:24<4:33:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 108050 examples [00:13, 28043.81 examples/s]"
      ]
     },
     {
@@ -113130,7 +10934,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9680/12318 [16:45:24<4:33:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Generating train split: 146873 examples [00:13, 118860.78 examples/s]"
      ]
     },
     {
@@ -113138,17 +10942,16 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9681/12318 [16:45:33<4:33:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9681/12318 [16:45:33<4:33:54,  6.23s/it, v_num=e4xv, train/loss"
+      "                                                                     \r"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Dataset json downloaded and prepared to /actions-runner/.cache/huggingface/datasets/json/default-5b99ac9e8f2083f3/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96. Subsequent calls will reuse this data.\r\n",
       "\r",
-      "Epoch 0:  79%|▊| 9682/12318 [16:45:38<4:33:47,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9682/12318 [16:45:38<4:33:47,  6.23s/it, v_num=e4xv, train/loss"
+      "  0%|                                                     | 0/1 [00:00<?, ?it/s]"
      ]
     },
     {
@@ -113156,8 +10959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9683/12318 [16:45:45<4:33:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9683/12318 [16:45:45<4:33:41,  6.23s/it, v_num=e4xv, train/loss"
+      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 24.72it/s]\r\n"
      ]
     },
     {
@@ -113165,8 +10967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9684/12318 [16:45:47<4:33:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9684/12318 [16:45:47<4:33:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|                      | 0/154911 [00:00<?, ? examples/s]"
      ]
     },
     {
@@ -113174,8 +10975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9685/12318 [16:45:53<4:33:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9685/12318 [16:45:53<4:33:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 10/154911 [00:01<7:49:21,  5.50 examples/s]"
      ]
     },
     {
@@ -113183,7 +10983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9686/12318 [16:45:58<4:33:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|            | 100/154911 [00:02<43:28, 59.35 examples/s]"
      ]
     },
     {
@@ -113191,7 +10991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9686/12318 [16:45:58<4:33:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|            | 118/154911 [00:02<37:56, 68.00 examples/s]"
      ]
     },
     {
@@ -113199,8 +10999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9687/12318 [16:46:06<4:33:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9687/12318 [16:46:06<4:33:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|            | 153/154911 [00:02<28:05, 91.81 examples/s]"
      ]
     },
     {
@@ -113208,8 +11007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9688/12318 [16:46:14<4:33:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9688/12318 [16:46:14<4:33:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|            | 174/154911 [00:02<26:58, 95.59 examples/s]"
      ]
     },
     {
@@ -113217,8 +11015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9689/12318 [16:46:21<4:33:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9689/12318 [16:46:21<4:33:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|            | 191/154911 [00:02<27:39, 93.23 examples/s]"
      ]
     },
     {
@@ -113226,8 +11023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9690/12318 [16:46:27<4:32:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9690/12318 [16:46:27<4:32:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 322/154911 [00:03<10:33, 243.84 examples/s]"
      ]
     },
     {
@@ -113235,8 +11031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9691/12318 [16:46:31<4:32:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9691/12318 [16:46:31<4:32:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 430/154911 [00:03<07:23, 348.14 examples/s]"
      ]
     },
     {
@@ -113244,8 +11039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9692/12318 [16:46:33<4:32:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9692/12318 [16:46:33<4:32:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 476/154911 [00:03<08:57, 287.56 examples/s]"
      ]
     },
     {
@@ -113253,7 +11047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9693/12318 [16:46:39<4:32:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 517/154911 [00:03<10:29, 245.15 examples/s]"
      ]
     },
     {
@@ -113261,7 +11055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9693/12318 [16:46:39<4:32:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 572/154911 [00:03<08:49, 291.28 examples/s]"
      ]
     },
     {
@@ -113269,8 +11063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9694/12318 [16:46:47<4:32:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9694/12318 [16:46:47<4:32:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 610/154911 [00:04<10:08, 253.73 examples/s]"
      ]
     },
     {
@@ -113278,8 +11071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9695/12318 [16:46:56<4:32:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9695/12318 [16:46:56<4:32:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 642/154911 [00:04<12:53, 199.45 examples/s]"
      ]
     },
     {
@@ -113287,8 +11079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9696/12318 [16:47:28<4:32:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9696/12318 [16:47:28<4:32:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 700/154911 [00:04<11:24, 225.35 examples/s]"
      ]
     },
     {
@@ -113296,8 +11087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9697/12318 [16:47:37<4:32:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9697/12318 [16:47:37<4:32:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   0%|           | 742/154911 [00:04<11:33, 222.26 examples/s]"
      ]
     },
     {
@@ -113305,8 +11095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9698/12318 [16:47:44<4:32:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9698/12318 [16:47:44<4:32:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|           | 788/154911 [00:04<09:49, 261.65 examples/s]"
      ]
     },
     {
@@ -113314,8 +11103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9699/12318 [16:47:46<4:32:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9699/12318 [16:47:46<4:32:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|           | 830/154911 [00:04<09:27, 271.47 examples/s]"
      ]
     },
     {
@@ -113323,8 +11111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9700/12318 [16:47:55<4:32:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9700/12318 [16:47:55<4:32:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|           | 862/154911 [00:05<11:52, 216.15 examples/s]"
      ]
     },
     {
@@ -113332,8 +11119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9701/12318 [16:47:57<4:31:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9701/12318 [16:47:57<4:31:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|           | 945/154911 [00:05<07:58, 321.97 examples/s]"
      ]
     },
     {
@@ -113341,8 +11127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9702/12318 [16:48:06<4:31:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9702/12318 [16:48:06<4:31:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|           | 986/154911 [00:05<09:29, 270.36 examples/s]"
      ]
     },
     {
@@ -113350,7 +11135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9703/12318 [16:48:14<4:31:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1193/154911 [00:05<06:29, 394.66 examples/s]"
      ]
     },
     {
@@ -113358,7 +11143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9703/12318 [16:48:14<4:31:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1409/154911 [00:06<03:56, 647.89 examples/s]"
      ]
     },
     {
@@ -113366,8 +11151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9704/12318 [16:48:19<4:31:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9704/12318 [16:48:19<4:31:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1497/154911 [00:06<04:48, 531.08 examples/s]"
      ]
     },
     {
@@ -113375,8 +11159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9705/12318 [16:48:24<4:31:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9705/12318 [16:48:24<4:31:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1568/154911 [00:06<05:41, 448.40 examples/s]"
      ]
     },
     {
@@ -113384,8 +11167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9706/12318 [16:48:30<4:31:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9706/12318 [16:48:30<4:31:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1634/154911 [00:06<05:34, 458.24 examples/s]"
      ]
     },
     {
@@ -113393,8 +11175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9707/12318 [16:48:34<4:31:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9707/12318 [16:48:34<4:31:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1693/154911 [00:06<05:33, 458.93 examples/s]"
      ]
     },
     {
@@ -113402,8 +11183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9708/12318 [16:48:41<4:31:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9708/12318 [16:48:41<4:31:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|          | 1796/154911 [00:07<06:13, 410.37 examples/s]"
      ]
     },
     {
@@ -113411,8 +11191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9709/12318 [16:48:44<4:31:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9709/12318 [16:48:44<4:31:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|▏         | 2003/154911 [00:07<03:43, 684.27 examples/s]"
      ]
     },
     {
@@ -113420,8 +11199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9710/12318 [16:48:46<4:30:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9710/12318 [16:48:46<4:30:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|▏         | 2100/154911 [00:07<04:43, 539.55 examples/s]"
      ]
     },
     {
@@ -113429,8 +11207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9711/12318 [16:48:54<4:30:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9711/12318 [16:48:54<4:30:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   1%|▏         | 2241/154911 [00:07<05:07, 495.71 examples/s]"
      ]
     },
     {
@@ -113438,8 +11215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9712/12318 [16:48:59<4:30:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9712/12318 [16:48:59<4:30:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2345/154911 [00:08<06:44, 377.49 examples/s]"
      ]
     },
     {
@@ -113447,8 +11223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9713/12318 [16:49:03<4:30:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9713/12318 [16:49:03<4:30:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2515/154911 [00:08<05:47, 438.79 examples/s]"
      ]
     },
     {
@@ -113456,8 +11231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9714/12318 [16:49:04<4:30:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9714/12318 [16:49:04<4:30:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2636/154911 [00:08<04:48, 527.20 examples/s]"
      ]
     },
     {
@@ -113465,8 +11239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9715/12318 [16:49:07<4:30:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9715/12318 [16:49:07<4:30:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2708/154911 [00:09<06:10, 410.71 examples/s]"
      ]
     },
     {
@@ -113474,8 +11247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9716/12318 [16:49:13<4:30:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9716/12318 [16:49:13<4:30:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2818/154911 [00:09<05:06, 496.44 examples/s]"
      ]
     },
     {
@@ -113483,8 +11255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9717/12318 [16:49:15<4:30:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9717/12318 [16:49:15<4:30:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 2888/154911 [00:09<07:00, 361.69 examples/s]"
      ]
     },
     {
@@ -113492,8 +11263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9718/12318 [16:49:19<4:30:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9718/12318 [16:49:19<4:30:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3048/154911 [00:09<06:47, 372.42 examples/s]"
      ]
     },
     {
@@ -113501,8 +11271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9719/12318 [16:49:23<4:29:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9719/12318 [16:49:23<4:29:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3098/154911 [00:10<07:07, 354.94 examples/s]"
      ]
     },
     {
@@ -113510,8 +11279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9720/12318 [16:49:29<4:29:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9720/12318 [16:49:29<4:29:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3151/154911 [00:10<07:47, 324.72 examples/s]"
      ]
     },
     {
@@ -113519,8 +11287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9721/12318 [16:49:38<4:29:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9721/12318 [16:49:38<4:29:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3341/154911 [00:10<06:20, 398.59 examples/s]"
      ]
     },
     {
@@ -113528,8 +11295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9722/12318 [16:49:47<4:29:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9722/12318 [16:49:47<4:29:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3435/154911 [00:10<06:49, 369.76 examples/s]"
      ]
     },
     {
@@ -113537,8 +11303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9723/12318 [16:49:51<4:29:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9723/12318 [16:49:51<4:29:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3474/154911 [00:11<08:46, 287.71 examples/s]"
      ]
     },
     {
@@ -113546,8 +11311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9724/12318 [16:50:00<4:29:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9724/12318 [16:50:00<4:29:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3564/154911 [00:11<07:32, 334.53 examples/s]"
      ]
     },
     {
@@ -113555,8 +11319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9725/12318 [16:50:08<4:29:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9725/12318 [16:50:08<4:29:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   2%|▏         | 3770/154911 [00:11<06:35, 381.82 examples/s]"
      ]
     },
     {
@@ -113564,8 +11327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9726/12318 [16:50:10<4:29:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9726/12318 [16:50:10<4:29:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4110/154911 [00:12<03:28, 721.86 examples/s]"
      ]
     },
     {
@@ -113573,8 +11335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9727/12318 [16:50:13<4:29:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9727/12318 [16:50:13<4:29:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4227/154911 [00:12<04:13, 595.18 examples/s]"
      ]
     },
     {
@@ -113582,8 +11343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9728/12318 [16:50:42<4:29:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9728/12318 [16:50:42<4:29:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4327/154911 [00:12<05:01, 498.77 examples/s]"
      ]
     },
     {
@@ -113591,8 +11351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9729/12318 [16:50:50<4:28:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9729/12318 [16:50:50<4:28:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4660/154911 [00:12<03:18, 758.05 examples/s]"
      ]
     },
     {
@@ -113600,8 +11359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9730/12318 [16:50:56<4:28:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9730/12318 [16:50:56<4:28:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4761/154911 [00:13<03:33, 703.01 examples/s]"
      ]
     },
     {
@@ -113609,8 +11367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9731/12318 [16:51:00<4:28:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9731/12318 [16:51:00<4:28:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 4848/154911 [00:13<04:59, 501.01 examples/s]"
      ]
     },
     {
@@ -113618,8 +11375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9732/12318 [16:51:09<4:28:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9732/12318 [16:51:09<4:28:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 5090/154911 [00:13<03:21, 742.70 examples/s]"
      ]
     },
     {
@@ -113627,8 +11383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9733/12318 [16:51:15<4:28:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9733/12318 [16:51:15<4:28:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 5207/154911 [00:13<04:03, 615.99 examples/s]"
      ]
     },
     {
@@ -113636,8 +11391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9734/12318 [16:51:19<4:28:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9734/12318 [16:51:19<4:28:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 5302/154911 [00:14<04:09, 599.93 examples/s]"
      ]
     },
     {
@@ -113645,8 +11399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9735/12318 [16:51:25<4:28:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9735/12318 [16:51:25<4:28:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   3%|▎         | 5384/154911 [00:14<04:19, 575.66 examples/s]"
      ]
     },
     {
@@ -113654,8 +11407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9736/12318 [16:51:28<4:28:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9736/12318 [16:51:28<4:28:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▎         | 5458/154911 [00:14<05:18, 469.89 examples/s]"
      ]
     },
     {
@@ -113663,8 +11415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9737/12318 [16:51:30<4:28:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9737/12318 [16:51:30<4:28:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▎         | 5551/154911 [00:14<04:36, 539.22 examples/s]"
      ]
     },
     {
@@ -113672,8 +11423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9738/12318 [16:51:37<4:28:01,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9738/12318 [16:51:37<4:28:01,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▎         | 5699/154911 [00:14<04:09, 596.92 examples/s]"
      ]
     },
     {
@@ -113681,8 +11431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9739/12318 [16:51:44<4:27:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9739/12318 [16:51:44<4:27:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 5831/154911 [00:15<04:20, 572.90 examples/s]"
      ]
     },
     {
@@ -113690,8 +11439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9740/12318 [16:51:48<4:27:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9740/12318 [16:51:48<4:27:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 5979/154911 [00:15<03:59, 621.94 examples/s]"
      ]
     },
     {
@@ -113699,8 +11447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9741/12318 [16:51:57<4:27:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9741/12318 [16:51:57<4:27:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6097/154911 [00:15<04:36, 538.34 examples/s]"
      ]
     },
     {
@@ -113708,8 +11455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9742/12318 [16:52:05<4:27:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9742/12318 [16:52:05<4:27:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6233/154911 [00:15<03:58, 622.67 examples/s]"
      ]
     },
     {
@@ -113717,8 +11463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9743/12318 [16:52:06<4:27:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9743/12318 [16:52:06<4:27:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6343/154911 [00:15<03:58, 623.28 examples/s]"
      ]
     },
     {
@@ -113726,8 +11471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9744/12318 [16:52:11<4:27:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9744/12318 [16:52:11<4:27:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6427/154911 [00:16<03:59, 621.07 examples/s]"
      ]
     },
     {
@@ -113735,8 +11479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9745/12318 [16:52:17<4:27:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9745/12318 [16:52:17<4:27:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6493/154911 [00:16<04:16, 578.32 examples/s]"
      ]
     },
     {
@@ -113744,8 +11487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9746/12318 [16:52:19<4:27:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9746/12318 [16:52:19<4:27:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6554/154911 [00:16<06:00, 411.21 examples/s]"
      ]
     },
     {
@@ -113753,8 +11495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9747/12318 [16:52:24<4:27:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9747/12318 [16:52:24<4:27:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6636/154911 [00:16<05:55, 417.09 examples/s]"
      ]
     },
     {
@@ -113762,8 +11503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9748/12318 [16:52:29<4:26:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9748/12318 [16:52:29<4:26:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6825/154911 [00:16<03:50, 641.54 examples/s]"
      ]
     },
     {
@@ -113771,8 +11511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9749/12318 [16:52:35<4:26:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9749/12318 [16:52:35<4:26:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   4%|▍         | 6925/154911 [00:17<04:30, 547.03 examples/s]"
      ]
     },
     {
@@ -113780,8 +11519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9750/12318 [16:52:44<4:26:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9750/12318 [16:52:44<4:26:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍         | 7052/154911 [00:17<04:55, 501.11 examples/s]"
      ]
     },
     {
@@ -113789,8 +11527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9751/12318 [16:52:49<4:26:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9751/12318 [16:52:49<4:26:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍         | 7206/154911 [00:17<04:54, 501.89 examples/s]"
      ]
     },
     {
@@ -113798,8 +11535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9752/12318 [16:52:53<4:26:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9752/12318 [16:52:53<4:26:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍         | 7404/154911 [00:17<03:34, 686.35 examples/s]"
      ]
     },
     {
@@ -113807,8 +11543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9753/12318 [16:53:01<4:26:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9753/12318 [16:53:01<4:26:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍         | 7648/154911 [00:17<02:32, 964.56 examples/s]"
      ]
     },
     {
@@ -113816,8 +11551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9754/12318 [16:53:05<4:26:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9754/12318 [16:53:05<4:26:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍        | 7808/154911 [00:18<02:19, 1051.88 examples/s]"
      ]
     },
     {
@@ -113825,8 +11559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9755/12318 [16:53:08<4:26:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9755/12318 [16:53:08<4:26:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍        | 7959/154911 [00:18<02:10, 1127.60 examples/s]"
      ]
     },
     {
@@ -113834,8 +11567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9756/12318 [16:53:16<4:26:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9756/12318 [16:53:16<4:26:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍        | 8160/154911 [00:18<01:51, 1315.33 examples/s]"
      ]
     },
     {
@@ -113843,8 +11575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9757/12318 [16:53:24<4:25:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9757/12318 [16:53:24<4:25:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍        | 8320/154911 [00:18<02:05, 1167.72 examples/s]"
      ]
     },
     {
@@ -113852,8 +11583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9758/12318 [16:53:28<4:25:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9758/12318 [16:53:28<4:25:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   5%|▍        | 8454/154911 [00:18<02:26, 1001.75 examples/s]"
      ]
     },
     {
@@ -113861,8 +11591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9759/12318 [16:53:32<4:25:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9759/12318 [16:53:32<4:25:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8569/154911 [00:18<02:56, 826.88 examples/s]"
      ]
     },
     {
@@ -113870,8 +11599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9760/12318 [16:54:00<4:25:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9760/12318 [16:54:00<4:25:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8665/154911 [00:18<03:08, 776.66 examples/s]"
      ]
     },
     {
@@ -113879,7 +11607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9761/12318 [16:54:08<4:25:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8754/154911 [00:19<03:16, 743.55 examples/s]"
      ]
     },
     {
@@ -113887,7 +11615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9761/12318 [16:54:08<4:25:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8834/154911 [00:19<03:22, 722.59 examples/s]"
      ]
     },
     {
@@ -113895,8 +11623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9762/12318 [16:54:13<4:25:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9762/12318 [16:54:13<4:25:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8910/154911 [00:19<03:26, 708.59 examples/s]"
      ]
     },
     {
@@ -113904,8 +11631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9763/12318 [16:54:20<4:25:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9763/12318 [16:54:20<4:25:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 8987/154911 [00:19<03:23, 716.95 examples/s]"
      ]
     },
     {
@@ -113913,8 +11639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9764/12318 [16:54:25<4:25:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9764/12318 [16:54:25<4:25:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9061/154911 [00:19<03:37, 669.95 examples/s]"
      ]
     },
     {
@@ -113922,8 +11647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9765/12318 [16:54:30<4:25:14,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9765/12318 [16:54:30<4:25:14,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9130/154911 [00:19<03:57, 613.97 examples/s]"
      ]
     },
     {
@@ -113931,8 +11655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9766/12318 [16:54:34<4:25:07,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9766/12318 [16:54:34<4:25:07,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9193/154911 [00:19<04:06, 592.13 examples/s]"
      ]
     },
     {
@@ -113940,8 +11663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9767/12318 [16:54:35<4:24:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9767/12318 [16:54:35<4:24:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9253/154911 [00:19<04:29, 540.38 examples/s]"
      ]
     },
     {
@@ -113949,7 +11671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9768/12318 [16:54:41<4:24:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9308/154911 [00:20<04:41, 518.04 examples/s]"
      ]
     },
     {
@@ -113957,7 +11679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9768/12318 [16:54:41<4:24:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9392/154911 [00:20<04:03, 596.65 examples/s]"
      ]
     },
     {
@@ -113965,8 +11687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9769/12318 [16:54:42<4:24:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9769/12318 [16:54:42<4:24:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9454/154911 [00:20<04:05, 593.58 examples/s]"
      ]
     },
     {
@@ -113974,8 +11695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9770/12318 [16:54:50<4:24:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9770/12318 [16:54:50<4:24:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9517/154911 [00:20<04:02, 600.74 examples/s]"
      ]
     },
     {
@@ -113983,8 +11703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9771/12318 [16:54:55<4:24:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9771/12318 [16:54:55<4:24:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9578/154911 [00:20<04:06, 590.18 examples/s]"
      ]
     },
     {
@@ -113992,8 +11711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9772/12318 [16:54:59<4:24:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9772/12318 [16:54:59<4:24:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌         | 9638/154911 [00:20<04:20, 557.13 examples/s]"
      ]
     },
     {
@@ -114001,8 +11719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9773/12318 [16:55:07<4:24:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9773/12318 [16:55:07<4:24:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▋         | 9695/154911 [00:20<04:26, 544.75 examples/s]"
      ]
     },
     {
@@ -114010,8 +11727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9774/12318 [16:55:16<4:24:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9774/12318 [16:55:16<4:24:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▋         | 9751/154911 [00:20<04:32, 533.35 examples/s]"
      ]
     },
     {
@@ -114019,8 +11735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9775/12318 [16:55:21<4:24:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9775/12318 [16:55:21<4:24:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▋         | 9809/154911 [00:20<04:25, 545.77 examples/s]"
      ]
     },
     {
@@ -114028,8 +11743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9776/12318 [16:55:26<4:24:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9776/12318 [16:55:26<4:24:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▋         | 9869/154911 [00:21<04:18, 560.76 examples/s]"
      ]
     },
     {
@@ -114037,8 +11751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9777/12318 [16:55:31<4:23:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9777/12318 [16:55:31<4:23:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▋         | 9944/154911 [00:21<04:19, 558.23 examples/s]"
      ]
     },
     {
@@ -114046,8 +11759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9778/12318 [16:55:37<4:23:49,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9778/12318 [16:55:37<4:23:49,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   6%|▌        | 10069/154911 [00:21<03:16, 737.43 examples/s]"
      ]
     },
     {
@@ -114055,8 +11767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9779/12318 [16:55:43<4:23:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9779/12318 [16:55:43<4:23:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10145/154911 [00:21<03:18, 729.49 examples/s]"
      ]
     },
     {
@@ -114064,8 +11775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9780/12318 [16:55:45<4:23:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9780/12318 [16:55:45<4:23:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10219/154911 [00:21<03:20, 722.83 examples/s]"
      ]
     },
     {
@@ -114073,8 +11783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9781/12318 [16:55:48<4:23:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9781/12318 [16:55:48<4:23:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10296/154911 [00:21<03:26, 699.02 examples/s]"
      ]
     },
     {
@@ -114082,8 +11791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9782/12318 [16:55:54<4:23:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9782/12318 [16:55:54<4:23:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10367/154911 [00:21<03:32, 679.72 examples/s]"
      ]
     },
     {
@@ -114091,8 +11799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9783/12318 [16:55:57<4:23:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9783/12318 [16:55:57<4:23:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10438/154911 [00:21<03:31, 683.93 examples/s]"
      ]
     },
     {
@@ -114100,8 +11807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9784/12318 [16:56:05<4:23:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9784/12318 [16:56:05<4:23:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10513/154911 [00:21<03:26, 700.04 examples/s]"
      ]
     },
     {
@@ -114109,8 +11815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9785/12318 [16:56:11<4:23:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9785/12318 [16:56:11<4:23:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10594/154911 [00:22<03:17, 729.38 examples/s]"
      ]
     },
     {
@@ -114118,8 +11823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9786/12318 [16:56:18<4:22:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9786/12318 [16:56:18<4:22:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10677/154911 [00:22<03:10, 758.19 examples/s]"
      ]
     },
     {
@@ -114127,8 +11831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9787/12318 [16:56:23<4:22:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9787/12318 [16:56:23<4:22:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▌        | 10754/154911 [00:22<03:16, 732.65 examples/s]"
      ]
     },
     {
@@ -114136,8 +11839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9788/12318 [16:56:28<4:22:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9788/12318 [16:56:28<4:22:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 10828/154911 [00:22<03:28, 692.47 examples/s]"
      ]
     },
     {
@@ -114145,8 +11847,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9789/12318 [16:56:33<4:22:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9789/12318 [16:56:33<4:22:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 10898/154911 [00:22<03:37, 662.86 examples/s]"
      ]
     },
     {
@@ -114154,7 +11855,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9790/12318 [16:56:39<4:22:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 10989/154911 [00:22<03:27, 694.02 examples/s]"
      ]
     },
     {
@@ -114162,7 +11863,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9790/12318 [16:56:39<4:22:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11061/154911 [00:22<03:25, 701.08 examples/s]"
      ]
     },
     {
@@ -114170,8 +11871,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9791/12318 [16:56:45<4:22:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9791/12318 [16:56:45<4:22:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11136/154911 [00:22<03:22, 708.74 examples/s]"
      ]
     },
     {
@@ -114179,8 +11879,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  79%|▊| 9792/12318 [16:57:30<4:22:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  79%|▊| 9792/12318 [16:57:30<4:22:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11208/154911 [00:22<03:37, 660.70 examples/s]"
      ]
     },
     {
@@ -114188,8 +11887,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9793/12318 [16:57:37<4:22:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9793/12318 [16:57:37<4:22:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11275/154911 [00:23<03:51, 620.05 examples/s]"
      ]
     },
     {
@@ -114197,8 +11895,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9794/12318 [16:57:39<4:22:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9794/12318 [16:57:39<4:22:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11338/154911 [00:23<03:58, 600.86 examples/s]"
      ]
     },
     {
@@ -114206,8 +11903,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9795/12318 [16:57:44<4:22:08,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9795/12318 [16:57:44<4:22:08,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11404/154911 [00:23<03:53, 613.82 examples/s]"
      ]
     },
     {
@@ -114215,8 +11911,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9796/12318 [16:57:49<4:22:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9796/12318 [16:57:49<4:22:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11466/154911 [00:23<04:08, 578.11 examples/s]"
      ]
     },
     {
@@ -114224,8 +11919,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9797/12318 [16:57:52<4:21:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9797/12318 [16:57:52<4:21:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11525/154911 [00:23<04:18, 554.87 examples/s]"
      ]
     },
     {
@@ -114233,8 +11927,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9798/12318 [16:57:55<4:21:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9798/12318 [16:57:55<4:21:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   7%|▋        | 11583/154911 [00:23<04:28, 533.31 examples/s]"
      ]
     },
     {
@@ -114242,8 +11935,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9799/12318 [16:58:02<4:21:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9799/12318 [16:58:02<4:21:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 11653/154911 [00:23<04:14, 563.32 examples/s]"
      ]
     },
     {
@@ -114251,8 +11943,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9800/12318 [16:58:09<4:21:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9800/12318 [16:58:09<4:21:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 11721/154911 [00:23<04:00, 594.83 examples/s]"
      ]
     },
     {
@@ -114260,8 +11951,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9801/12318 [16:58:16<4:21:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9801/12318 [16:58:16<4:21:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 11782/154911 [00:23<03:59, 597.48 examples/s]"
      ]
     },
     {
@@ -114269,8 +11959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9802/12318 [16:58:23<4:21:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9802/12318 [16:58:23<4:21:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 11855/154911 [00:24<03:52, 616.26 examples/s]"
      ]
     },
     {
@@ -114278,8 +11967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9803/12318 [16:58:31<4:21:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9803/12318 [16:58:31<4:21:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 11923/154911 [00:24<03:49, 622.69 examples/s]"
      ]
     },
     {
@@ -114287,7 +11975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9804/12318 [16:58:36<4:21:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12019/154911 [00:24<03:25, 694.15 examples/s]"
      ]
     },
     {
@@ -114295,7 +11983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9804/12318 [16:58:36<4:21:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12104/154911 [00:24<03:14, 735.77 examples/s]"
      ]
     },
     {
@@ -114303,7 +11991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9805/12318 [16:58:39<4:21:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12179/154911 [00:24<03:22, 703.65 examples/s]"
      ]
     },
     {
@@ -114311,7 +11999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9805/12318 [16:58:39<4:21:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12250/154911 [00:24<03:28, 685.05 examples/s]"
      ]
     },
     {
@@ -114319,8 +12007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9806/12318 [16:58:46<4:20:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9806/12318 [16:58:46<4:20:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12334/154911 [00:24<03:16, 726.47 examples/s]"
      ]
     },
     {
@@ -114328,8 +12015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9807/12318 [16:58:48<4:20:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9807/12318 [16:58:48<4:20:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12415/154911 [00:24<03:16, 724.06 examples/s]"
      ]
     },
     {
@@ -114337,8 +12023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9808/12318 [16:58:55<4:20:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9808/12318 [16:58:55<4:20:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋        | 12566/154911 [00:24<02:35, 913.45 examples/s]"
      ]
     },
     {
@@ -114346,8 +12031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9809/12318 [16:59:00<4:20:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9809/12318 [16:59:00<4:20:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋       | 12719/154911 [00:25<02:12, 1075.53 examples/s]"
      ]
     },
     {
@@ -114355,8 +12039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9810/12318 [16:59:07<4:20:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9810/12318 [16:59:07<4:20:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▋       | 12873/154911 [00:25<02:00, 1176.63 examples/s]"
      ]
     },
     {
@@ -114364,8 +12047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9811/12318 [16:59:15<4:20:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9811/12318 [16:59:15<4:20:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▊        | 12992/154911 [00:25<02:23, 986.45 examples/s]"
      ]
     },
     {
@@ -114373,8 +12055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9812/12318 [16:59:23<4:20:21,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9812/12318 [16:59:23<4:20:21,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   8%|▊        | 13096/154911 [00:25<02:37, 903.12 examples/s]"
      ]
     },
     {
@@ -114382,8 +12063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9813/12318 [16:59:32<4:20:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9813/12318 [16:59:32<4:20:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13191/154911 [00:25<02:44, 859.34 examples/s]"
      ]
     },
     {
@@ -114391,8 +12071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9814/12318 [16:59:37<4:20:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9814/12318 [16:59:37<4:20:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13280/154911 [00:25<02:59, 786.91 examples/s]"
      ]
     },
     {
@@ -114400,8 +12079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9815/12318 [16:59:43<4:20:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9815/12318 [16:59:43<4:20:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13362/154911 [00:25<03:15, 723.01 examples/s]"
      ]
     },
     {
@@ -114409,8 +12087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9816/12318 [16:59:51<4:19:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9816/12318 [16:59:51<4:19:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13444/154911 [00:26<03:22, 699.38 examples/s]"
      ]
     },
     {
@@ -114418,8 +12095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9817/12318 [16:59:59<4:19:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9817/12318 [16:59:59<4:19:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13519/154911 [00:26<03:27, 681.54 examples/s]"
      ]
     },
     {
@@ -114427,8 +12103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9818/12318 [17:00:04<4:19:44,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9818/12318 [17:00:04<4:19:44,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13590/154911 [00:26<03:25, 687.30 examples/s]"
      ]
     },
     {
@@ -114436,8 +12111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9819/12318 [17:00:10<4:19:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9819/12318 [17:00:10<4:19:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13680/154911 [00:26<03:11, 737.61 examples/s]"
      ]
     },
     {
@@ -114445,8 +12119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9820/12318 [17:00:11<4:19:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9820/12318 [17:00:11<4:19:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13762/154911 [00:26<03:05, 759.59 examples/s]"
      ]
     },
     {
@@ -114454,8 +12127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9821/12318 [17:00:17<4:19:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9821/12318 [17:00:17<4:19:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13839/154911 [00:26<03:08, 750.35 examples/s]"
      ]
     },
     {
@@ -114463,8 +12135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9822/12318 [17:00:23<4:19:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9822/12318 [17:00:23<4:19:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13915/154911 [00:26<03:11, 734.37 examples/s]"
      ]
     },
     {
@@ -114472,8 +12143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9823/12318 [17:00:27<4:19:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9823/12318 [17:00:27<4:19:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 13989/154911 [00:26<03:11, 734.29 examples/s]"
      ]
     },
     {
@@ -114481,8 +12151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9824/12318 [17:00:50<4:19:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9824/12318 [17:00:50<4:19:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14090/154911 [00:26<02:53, 811.75 examples/s]"
      ]
     },
     {
@@ -114490,8 +12159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9825/12318 [17:00:52<4:19:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9825/12318 [17:00:52<4:19:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14185/154911 [00:26<02:46, 847.70 examples/s]"
      ]
     },
     {
@@ -114499,8 +12167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9826/12318 [17:00:57<4:18:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9826/12318 [17:00:57<4:18:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14273/154911 [00:27<02:50, 824.74 examples/s]"
      ]
     },
     {
@@ -114508,8 +12175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9827/12318 [17:00:58<4:18:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9827/12318 [17:00:58<4:18:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14357/154911 [00:27<02:53, 811.79 examples/s]"
      ]
     },
     {
@@ -114517,8 +12183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9828/12318 [17:01:03<4:18:41,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9828/12318 [17:01:03<4:18:41,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14442/154911 [00:27<02:50, 822.19 examples/s]"
      ]
     },
     {
@@ -114526,8 +12191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9829/12318 [17:01:09<4:18:35,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9829/12318 [17:01:09<4:18:35,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14527/154911 [00:27<03:43, 628.61 examples/s]"
      ]
     },
     {
@@ -114535,8 +12199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9830/12318 [17:01:15<4:18:28,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9830/12318 [17:01:15<4:18:28,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):   9%|▊        | 14668/154911 [00:27<03:01, 772.70 examples/s]"
      ]
     },
     {
@@ -114544,8 +12207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9831/12318 [17:01:20<4:18:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9831/12318 [17:01:20<4:18:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▊        | 14772/154911 [00:27<02:48, 829.61 examples/s]"
      ]
     },
     {
@@ -114553,8 +12215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9832/12318 [17:01:25<4:18:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9832/12318 [17:01:25<4:18:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▊        | 14872/154911 [00:27<02:40, 871.39 examples/s]"
      ]
     },
     {
@@ -114562,8 +12223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9833/12318 [17:01:32<4:18:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9833/12318 [17:01:32<4:18:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▊        | 14997/154911 [00:27<02:24, 969.75 examples/s]"
      ]
     },
     {
@@ -114571,8 +12231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9834/12318 [17:01:33<4:18:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9834/12318 [17:01:33<4:18:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▊       | 15107/154911 [00:28<02:19, 1005.65 examples/s]"
      ]
     },
     {
@@ -114580,8 +12239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9835/12318 [17:01:37<4:17:55,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9835/12318 [17:01:37<4:17:55,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15216/154911 [00:28<02:35, 895.95 examples/s]"
      ]
     },
     {
@@ -114589,8 +12247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9836/12318 [17:01:42<4:17:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9836/12318 [17:01:42<4:17:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15317/154911 [00:28<02:41, 863.44 examples/s]"
      ]
     },
     {
@@ -114598,8 +12255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9837/12318 [17:01:47<4:17:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9837/12318 [17:01:47<4:17:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15407/154911 [00:28<02:50, 819.02 examples/s]"
      ]
     },
     {
@@ -114607,8 +12263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9838/12318 [17:01:55<4:17:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9838/12318 [17:01:55<4:17:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15493/154911 [00:28<02:49, 822.93 examples/s]"
      ]
     },
     {
@@ -114616,8 +12271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9839/12318 [17:01:58<4:17:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9839/12318 [17:01:58<4:17:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15577/154911 [00:28<02:58, 781.19 examples/s]"
      ]
     },
     {
@@ -114625,8 +12279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9840/12318 [17:02:07<4:17:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9840/12318 [17:02:07<4:17:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15667/154911 [00:28<02:51, 811.69 examples/s]"
      ]
     },
     {
@@ -114634,8 +12287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9841/12318 [17:02:14<4:17:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9841/12318 [17:02:14<4:17:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15762/154911 [00:28<02:44, 846.60 examples/s]"
      ]
     },
     {
@@ -114643,8 +12295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9842/12318 [17:02:21<4:17:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9842/12318 [17:02:21<4:17:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15849/154911 [00:28<02:58, 777.22 examples/s]"
      ]
     },
     {
@@ -114652,8 +12303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9843/12318 [17:02:25<4:17:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9843/12318 [17:02:25<4:17:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 15929/154911 [00:29<03:08, 735.45 examples/s]"
      ]
     },
     {
@@ -114661,8 +12311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9844/12318 [17:02:31<4:16:58,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9844/12318 [17:02:31<4:16:58,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 16014/154911 [00:29<03:03, 757.22 examples/s]"
      ]
     },
     {
@@ -114670,8 +12319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9845/12318 [17:02:37<4:16:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9845/12318 [17:02:37<4:16:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 16091/154911 [00:29<03:13, 717.43 examples/s]"
      ]
     },
     {
@@ -114679,8 +12327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9846/12318 [17:02:41<4:16:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9846/12318 [17:02:41<4:16:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 16166/154911 [00:29<03:33, 650.04 examples/s]"
      ]
     },
     {
@@ -114688,8 +12335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9847/12318 [17:02:50<4:16:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9847/12318 [17:02:50<4:16:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  10%|▉        | 16233/154911 [00:29<03:31, 655.07 examples/s]"
      ]
     },
     {
@@ -114697,8 +12343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9848/12318 [17:02:57<4:16:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9848/12318 [17:02:57<4:16:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16300/154911 [00:29<03:32, 651.66 examples/s]"
      ]
     },
     {
@@ -114706,8 +12351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9849/12318 [17:03:03<4:16:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9849/12318 [17:03:03<4:16:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16366/154911 [00:29<03:43, 621.16 examples/s]"
      ]
     },
     {
@@ -114715,8 +12359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9850/12318 [17:03:12<4:16:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9850/12318 [17:03:12<4:16:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16445/154911 [00:29<03:34, 645.92 examples/s]"
      ]
     },
     {
@@ -114724,8 +12367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9851/12318 [17:03:16<4:16:15,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9851/12318 [17:03:16<4:16:15,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16531/154911 [00:30<03:16, 704.23 examples/s]"
      ]
     },
     {
@@ -114733,8 +12375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9852/12318 [17:03:21<4:16:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9852/12318 [17:03:21<4:16:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16623/154911 [00:30<03:02, 757.98 examples/s]"
      ]
     },
     {
@@ -114742,8 +12383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9853/12318 [17:03:30<4:16:03,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9853/12318 [17:03:30<4:16:03,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16700/154911 [00:30<03:09, 727.74 examples/s]"
      ]
     },
     {
@@ -114751,8 +12391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9854/12318 [17:03:36<4:15:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9854/12318 [17:03:36<4:15:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16774/154911 [00:30<03:15, 707.23 examples/s]"
      ]
     },
     {
@@ -114760,8 +12399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9855/12318 [17:03:38<4:15:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9855/12318 [17:03:38<4:15:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16846/154911 [00:30<03:17, 699.38 examples/s]"
      ]
     },
     {
@@ -114769,8 +12407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9856/12318 [17:04:14<4:15:51,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9856/12318 [17:04:14<4:15:51,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16917/154911 [00:30<03:29, 659.26 examples/s]"
      ]
     },
     {
@@ -114778,8 +12415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9857/12318 [17:04:22<4:15:45,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9857/12318 [17:04:22<4:15:45,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 16992/154911 [00:30<03:21, 684.08 examples/s]"
      ]
     },
     {
@@ -114787,8 +12423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9858/12318 [17:04:23<4:15:37,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9858/12318 [17:04:23<4:15:37,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 17080/154911 [00:30<03:07, 735.76 examples/s]"
      ]
     },
     {
@@ -114796,8 +12431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9859/12318 [17:04:28<4:15:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9859/12318 [17:04:28<4:15:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|▉        | 17158/154911 [00:30<03:11, 719.08 examples/s]"
      ]
     },
     {
@@ -114805,8 +12439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9860/12318 [17:04:31<4:15:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9860/12318 [17:04:31<4:15:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17261/154911 [00:30<02:52, 796.23 examples/s]"
      ]
     },
     {
@@ -114814,8 +12447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9861/12318 [17:04:38<4:15:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9861/12318 [17:04:38<4:15:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17342/154911 [00:31<03:00, 763.43 examples/s]"
      ]
     },
     {
@@ -114823,8 +12455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9862/12318 [17:04:44<4:15:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9862/12318 [17:04:44<4:15:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17419/154911 [00:31<03:15, 703.86 examples/s]"
      ]
     },
     {
@@ -114832,8 +12463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9863/12318 [17:04:51<4:15:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9863/12318 [17:04:51<4:15:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17491/154911 [00:31<03:27, 663.72 examples/s]"
      ]
     },
     {
@@ -114841,8 +12471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9864/12318 [17:04:59<4:15:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9864/12318 [17:04:59<4:15:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17559/154911 [00:31<03:34, 640.56 examples/s]"
      ]
     },
     {
@@ -114850,8 +12479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9865/12318 [17:05:08<4:14:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9865/12318 [17:05:08<4:14:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17633/154911 [00:31<03:31, 649.96 examples/s]"
      ]
     },
     {
@@ -114859,8 +12487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9866/12318 [17:05:16<4:14:48,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9866/12318 [17:05:16<4:14:48,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17708/154911 [00:31<03:23, 673.50 examples/s]"
      ]
     },
     {
@@ -114868,8 +12495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9867/12318 [17:05:21<4:14:42,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9867/12318 [17:05:21<4:14:42,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  11%|█        | 17776/154911 [00:31<03:31, 647.65 examples/s]"
      ]
     },
     {
@@ -114877,8 +12503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9868/12318 [17:05:28<4:14:36,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9868/12318 [17:05:28<4:14:36,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 17863/154911 [00:31<03:13, 707.99 examples/s]"
      ]
     },
     {
@@ -114886,8 +12511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9869/12318 [17:05:31<4:14:29,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9869/12318 [17:05:31<4:14:29,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 17937/154911 [00:32<03:11, 713.96 examples/s]"
      ]
     },
     {
@@ -114895,8 +12519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9870/12318 [17:05:36<4:14:22,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9870/12318 [17:05:36<4:14:22,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18010/154911 [00:32<03:16, 695.23 examples/s]"
      ]
     },
     {
@@ -114904,8 +12527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9871/12318 [17:05:43<4:14:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9871/12318 [17:05:43<4:14:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18091/154911 [00:32<03:08, 724.93 examples/s]"
      ]
     },
     {
@@ -114913,8 +12535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9872/12318 [17:05:48<4:14:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9872/12318 [17:05:48<4:14:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18167/154911 [00:32<03:32, 644.44 examples/s]"
      ]
     },
     {
@@ -114922,8 +12543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9873/12318 [17:05:51<4:14:02,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9873/12318 [17:05:51<4:14:02,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18264/154911 [00:32<03:08, 725.86 examples/s]"
      ]
     },
     {
@@ -114931,8 +12551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9874/12318 [17:05:55<4:13:56,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9874/12318 [17:05:55<4:13:56,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18340/154911 [00:32<03:11, 712.47 examples/s]"
      ]
     },
     {
@@ -114940,8 +12559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9875/12318 [17:06:03<4:13:50,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9875/12318 [17:06:03<4:13:50,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18449/154911 [00:32<02:47, 812.81 examples/s]"
      ]
     },
     {
@@ -114949,8 +12567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9876/12318 [17:06:08<4:13:43,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9876/12318 [17:06:08<4:13:43,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18537/154911 [00:32<02:44, 831.43 examples/s]"
      ]
     },
     {
@@ -114958,8 +12575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9877/12318 [17:06:16<4:13:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9877/12318 [17:06:16<4:13:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18624/154911 [00:32<02:46, 818.14 examples/s]"
      ]
     },
     {
@@ -114967,8 +12583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9878/12318 [17:06:23<4:13:31,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9878/12318 [17:06:23<4:13:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18708/154911 [00:33<02:50, 798.84 examples/s]"
      ]
     },
     {
@@ -114976,8 +12591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9879/12318 [17:06:28<4:13:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9879/12318 [17:06:28<4:13:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18789/154911 [00:33<03:02, 746.24 examples/s]"
      ]
     },
     {
@@ -114985,8 +12599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9880/12318 [17:06:31<4:13:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9880/12318 [17:06:31<4:13:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18870/154911 [00:33<03:03, 741.98 examples/s]"
      ]
     },
     {
@@ -114994,8 +12607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9881/12318 [17:06:39<4:13:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9881/12318 [17:06:39<4:13:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 18946/154911 [00:33<03:20, 677.27 examples/s]"
      ]
     },
     {
@@ -115003,8 +12615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9882/12318 [17:06:45<4:13:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9882/12318 [17:06:45<4:13:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 19022/154911 [00:33<03:14, 698.65 examples/s]"
      ]
     },
     {
@@ -115012,8 +12623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9883/12318 [17:06:54<4:13:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9883/12318 [17:06:54<4:13:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 19094/154911 [00:33<03:23, 666.71 examples/s]"
      ]
     },
     {
@@ -115021,8 +12631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9884/12318 [17:06:57<4:12:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9884/12318 [17:06:57<4:12:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 19163/154911 [00:33<03:33, 636.80 examples/s]"
      ]
     },
     {
@@ -115030,8 +12639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9885/12318 [17:07:00<4:12:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9885/12318 [17:07:00<4:12:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 19235/154911 [00:33<03:30, 643.57 examples/s]"
      ]
     },
     {
@@ -115039,8 +12647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9886/12318 [17:07:05<4:12:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9886/12318 [17:07:05<4:12:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  12%|█        | 19304/154911 [00:33<03:28, 649.32 examples/s]"
      ]
     },
     {
@@ -115048,8 +12655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9887/12318 [17:07:11<4:12:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9887/12318 [17:07:11<4:12:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19372/154911 [00:34<03:26, 656.08 examples/s]"
      ]
     },
     {
@@ -115057,8 +12663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9888/12318 [17:07:36<4:12:32,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9888/12318 [17:07:36<4:12:32,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19438/154911 [00:34<03:30, 644.16 examples/s]"
      ]
     },
     {
@@ -115066,8 +12671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9889/12318 [17:07:42<4:12:25,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9889/12318 [17:07:42<4:12:25,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19508/154911 [00:34<03:25, 659.91 examples/s]"
      ]
     },
     {
@@ -115075,8 +12679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9890/12318 [17:07:43<4:12:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9890/12318 [17:07:43<4:12:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19589/154911 [00:34<03:12, 702.69 examples/s]"
      ]
     },
     {
@@ -115084,8 +12687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9891/12318 [17:07:51<4:12:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9891/12318 [17:07:51<4:12:12,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19660/154911 [00:34<03:15, 692.49 examples/s]"
      ]
     },
     {
@@ -115093,8 +12695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9892/12318 [17:07:55<4:12:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9892/12318 [17:07:55<4:12:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19740/154911 [00:34<03:07, 720.43 examples/s]"
      ]
     },
     {
@@ -115102,8 +12703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9893/12318 [17:07:59<4:11:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9893/12318 [17:07:59<4:11:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19814/154911 [00:34<03:11, 705.30 examples/s]"
      ]
     },
     {
@@ -115111,8 +12711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9894/12318 [17:08:01<4:11:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9894/12318 [17:08:01<4:11:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19885/154911 [00:34<03:20, 672.29 examples/s]"
      ]
     },
     {
@@ -115120,8 +12719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9895/12318 [17:08:09<4:11:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9895/12318 [17:08:09<4:11:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 19955/154911 [00:34<03:18, 678.36 examples/s]"
      ]
     },
     {
@@ -115129,8 +12727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9896/12318 [17:08:13<4:11:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9896/12318 [17:08:13<4:11:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20029/154911 [00:34<03:13, 695.34 examples/s]"
      ]
     },
     {
@@ -115138,8 +12735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9897/12318 [17:08:16<4:11:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9897/12318 [17:08:16<4:11:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20102/154911 [00:35<03:12, 701.83 examples/s]"
      ]
     },
     {
@@ -115147,8 +12743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9898/12318 [17:08:20<4:11:25,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9898/12318 [17:08:20<4:11:25,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20173/154911 [00:35<03:22, 664.81 examples/s]"
      ]
     },
     {
@@ -115156,8 +12751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9899/12318 [17:08:25<4:11:18,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9899/12318 [17:08:25<4:11:18,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20249/154911 [00:35<03:15, 689.98 examples/s]"
      ]
     },
     {
@@ -115165,8 +12759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9900/12318 [17:08:33<4:11:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9900/12318 [17:08:33<4:11:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20360/154911 [00:35<02:46, 808.88 examples/s]"
      ]
     },
     {
@@ -115174,7 +12767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9901/12318 [17:08:36<4:11:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20467/154911 [00:35<02:32, 883.00 examples/s]"
      ]
     },
     {
@@ -115182,7 +12775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9901/12318 [17:08:36<4:11:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20562/154911 [00:35<02:31, 887.48 examples/s]"
      ]
     },
     {
@@ -115190,8 +12783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9902/12318 [17:08:42<4:10:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9902/12318 [17:08:42<4:10:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█       | 20693/154911 [00:35<02:13, 1005.35 examples/s]"
      ]
     },
     {
@@ -115199,8 +12791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9903/12318 [17:08:47<4:10:53,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9903/12318 [17:08:47<4:10:53,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20795/154911 [00:35<02:43, 822.58 examples/s]"
      ]
     },
     {
@@ -115208,7 +12799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9904/12318 [17:08:55<4:10:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  13%|█▏       | 20885/154911 [00:36<02:47, 801.43 examples/s]"
      ]
     },
     {
@@ -115216,7 +12807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9904/12318 [17:08:55<4:10:47,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▏       | 20976/154911 [00:36<02:42, 823.32 examples/s]"
      ]
     },
     {
@@ -115224,8 +12815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9905/12318 [17:08:57<4:10:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9905/12318 [17:08:57<4:10:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█       | 21152/154911 [00:36<02:06, 1056.56 examples/s]"
      ]
     },
     {
@@ -115233,8 +12823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9906/12318 [17:09:00<4:10:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9906/12318 [17:09:00<4:10:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█       | 21300/154911 [00:36<01:59, 1118.88 examples/s]"
      ]
     },
     {
@@ -115242,8 +12831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9907/12318 [17:09:04<4:10:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9907/12318 [17:09:04<4:10:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█       | 21425/154911 [00:36<01:55, 1152.48 examples/s]"
      ]
     },
     {
@@ -115251,8 +12839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9908/12318 [17:09:07<4:10:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9908/12318 [17:09:07<4:10:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█       | 21543/154911 [00:36<02:06, 1056.46 examples/s]"
      ]
     },
     {
@@ -115260,8 +12847,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9909/12318 [17:09:11<4:10:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9909/12318 [17:09:11<4:10:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█       | 21659/154911 [00:36<02:03, 1082.36 examples/s]"
      ]
     },
     {
@@ -115269,8 +12855,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9910/12318 [17:09:12<4:10:05,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9910/12318 [17:09:12<4:10:05,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▏      | 21815/154911 [00:36<01:49, 1212.79 examples/s]"
      ]
     },
     {
@@ -115278,8 +12863,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9911/12318 [17:09:20<4:09:59,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9911/12318 [17:09:20<4:09:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▏      | 21943/154911 [00:36<01:54, 1161.58 examples/s]"
      ]
     },
     {
@@ -115287,8 +12871,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9912/12318 [17:09:26<4:09:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9912/12318 [17:09:26<4:09:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▏      | 22080/154911 [00:37<01:49, 1217.23 examples/s]"
      ]
     },
     {
@@ -115296,8 +12879,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9913/12318 [17:09:30<4:09:46,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9913/12318 [17:09:31<4:09:46,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▏      | 22204/154911 [00:37<02:11, 1012.54 examples/s]"
      ]
     },
     {
@@ -115305,8 +12887,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9914/12318 [17:09:34<4:09:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9914/12318 [17:09:34<4:09:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▎       | 22313/154911 [00:37<02:20, 946.17 examples/s]"
      ]
     },
     {
@@ -115314,8 +12895,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  80%|▊| 9915/12318 [17:09:41<4:09:33,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  80%|▊| 9915/12318 [17:09:41<4:09:33,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  14%|█▎       | 22414/154911 [00:37<02:37, 839.57 examples/s]"
      ]
     },
     {
@@ -115323,8 +12903,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9916/12318 [17:09:47<4:09:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9916/12318 [17:09:47<4:09:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22503/154911 [00:37<02:38, 833.65 examples/s]"
      ]
     },
     {
@@ -115332,8 +12911,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9917/12318 [17:09:49<4:09:19,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9917/12318 [17:09:49<4:09:19,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22590/154911 [00:37<02:50, 776.49 examples/s]"
      ]
     },
     {
@@ -115341,8 +12919,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9918/12318 [17:09:53<4:09:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9918/12318 [17:09:53<4:09:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22675/154911 [00:37<02:55, 751.39 examples/s]"
      ]
     },
     {
@@ -115350,8 +12927,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9919/12318 [17:10:00<4:09:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9919/12318 [17:10:00<4:09:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22752/154911 [00:37<02:57, 744.83 examples/s]"
      ]
     },
     {
@@ -115359,8 +12935,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9920/12318 [17:10:58<4:09:13,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9920/12318 [17:10:58<4:09:13,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22828/154911 [00:38<03:27, 637.22 examples/s]"
      ]
     },
     {
@@ -115368,8 +12943,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9921/12318 [17:11:03<4:09:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9921/12318 [17:11:03<4:09:06,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22895/154911 [00:38<04:01, 547.22 examples/s]"
      ]
     },
     {
@@ -115377,8 +12951,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9922/12318 [17:11:07<4:08:59,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9922/12318 [17:11:07<4:08:59,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 22957/154911 [00:38<03:54, 563.46 examples/s]"
      ]
     },
     {
@@ -115386,8 +12959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9923/12318 [17:11:09<4:08:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9923/12318 [17:11:09<4:08:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23017/154911 [00:38<03:51, 569.27 examples/s]"
      ]
     },
     {
@@ -115395,8 +12967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9924/12318 [17:11:18<4:08:47,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9924/12318 [17:11:18<4:08:47,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23078/154911 [00:38<03:48, 577.44 examples/s]"
      ]
     },
     {
@@ -115404,8 +12975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9925/12318 [17:11:21<4:08:40,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9925/12318 [17:11:21<4:08:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23138/154911 [00:38<03:59, 550.57 examples/s]"
      ]
     },
     {
@@ -115413,8 +12983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9926/12318 [17:11:29<4:08:34,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9926/12318 [17:11:29<4:08:34,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23195/154911 [00:38<04:11, 523.44 examples/s]"
      ]
     },
     {
@@ -115422,8 +12991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9927/12318 [17:11:31<4:08:27,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9927/12318 [17:11:31<4:08:27,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23249/154911 [00:38<04:19, 506.61 examples/s]"
      ]
     },
     {
@@ -115431,8 +12999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9928/12318 [17:11:36<4:08:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9928/12318 [17:11:36<4:08:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23317/154911 [00:39<03:59, 549.71 examples/s]"
      ]
     },
     {
@@ -115440,8 +13007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9929/12318 [17:11:40<4:08:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9929/12318 [17:11:40<4:08:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23373/154911 [00:39<03:58, 552.00 examples/s]"
      ]
     },
     {
@@ -115449,8 +13015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9930/12318 [17:11:43<4:08:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9930/12318 [17:11:43<4:08:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23429/154911 [00:39<04:21, 502.13 examples/s]"
      ]
     },
     {
@@ -115458,8 +13023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9931/12318 [17:11:50<4:08:00,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9931/12318 [17:11:50<4:08:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23525/154911 [00:39<03:31, 620.71 examples/s]"
      ]
     },
     {
@@ -115467,8 +13031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9932/12318 [17:11:56<4:07:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9932/12318 [17:11:56<4:07:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23590/154911 [00:39<03:31, 620.59 examples/s]"
      ]
     },
     {
@@ -115476,8 +13039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9933/12318 [17:12:02<4:07:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9933/12318 [17:12:02<4:07:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▎       | 23654/154911 [00:39<03:35, 608.42 examples/s]"
      ]
     },
     {
@@ -115485,7 +13047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9934/12318 [17:12:03<4:07:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▍       | 23717/154911 [00:39<03:36, 605.23 examples/s]"
      ]
     },
     {
@@ -115493,7 +13055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9934/12318 [17:12:03<4:07:40,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▍       | 23783/154911 [00:39<03:33, 613.08 examples/s]"
      ]
     },
     {
@@ -115501,8 +13063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9935/12318 [17:12:08<4:07:34,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9935/12318 [17:12:08<4:07:34,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▍       | 23846/154911 [00:39<03:34, 611.60 examples/s]"
      ]
     },
     {
@@ -115510,8 +13071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9936/12318 [17:12:09<4:07:26,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9936/12318 [17:12:09<4:07:26,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▍       | 23908/154911 [00:40<03:36, 605.48 examples/s]"
      ]
     },
     {
@@ -115519,8 +13079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9937/12318 [17:12:14<4:07:20,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9937/12318 [17:12:14<4:07:20,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  15%|█▍       | 23973/154911 [00:40<03:33, 612.11 examples/s]"
      ]
     },
     {
@@ -115528,8 +13087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9938/12318 [17:12:17<4:07:13,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9938/12318 [17:12:17<4:07:13,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24069/154911 [00:40<03:03, 711.84 examples/s]"
      ]
     },
     {
@@ -115537,8 +13095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9939/12318 [17:12:20<4:07:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9939/12318 [17:12:20<4:07:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24146/154911 [00:40<02:59, 727.90 examples/s]"
      ]
     },
     {
@@ -115546,7 +13103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9940/12318 [17:12:28<4:07:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24225/154911 [00:40<02:59, 727.56 examples/s]"
      ]
     },
     {
@@ -115554,7 +13111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9940/12318 [17:12:28<4:07:00,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24299/154911 [00:40<03:20, 650.62 examples/s]"
      ]
     },
     {
@@ -115562,8 +13119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9941/12318 [17:12:35<4:06:54,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9941/12318 [17:12:35<4:06:54,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24366/154911 [00:40<03:21, 646.45 examples/s]"
      ]
     },
     {
@@ -115571,8 +13127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9942/12318 [17:12:43<4:06:48,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9942/12318 [17:12:43<4:06:48,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24432/154911 [00:40<03:20, 649.38 examples/s]"
      ]
     },
     {
@@ -115580,8 +13135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9943/12318 [17:12:52<4:06:42,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9943/12318 [17:12:52<4:06:42,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24498/154911 [00:40<03:22, 644.93 examples/s]"
      ]
     },
     {
@@ -115589,8 +13143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9944/12318 [17:12:57<4:06:36,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9944/12318 [17:12:57<4:06:36,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24564/154911 [00:41<03:39, 594.64 examples/s]"
      ]
     },
     {
@@ -115598,8 +13151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9945/12318 [17:13:04<4:06:30,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9945/12318 [17:13:04<4:06:30,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24627/154911 [00:41<03:35, 603.86 examples/s]"
      ]
     },
     {
@@ -115607,8 +13159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9946/12318 [17:13:08<4:06:23,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9946/12318 [17:13:08<4:06:23,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24690/154911 [00:41<03:33, 609.43 examples/s]"
      ]
     },
     {
@@ -115616,8 +13167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9947/12318 [17:13:11<4:06:16,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9947/12318 [17:13:11<4:06:16,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24754/154911 [00:41<03:43, 582.03 examples/s]"
      ]
     },
     {
@@ -115625,8 +13175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9948/12318 [17:13:16<4:06:09,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9948/12318 [17:13:16<4:06:09,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24813/154911 [00:41<03:43, 581.73 examples/s]"
      ]
     },
     {
@@ -115634,8 +13183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9949/12318 [17:13:24<4:06:04,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9949/12318 [17:13:24<4:06:04,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24888/154911 [00:41<03:39, 593.02 examples/s]"
      ]
     },
     {
@@ -115643,8 +13191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9950/12318 [17:13:29<4:05:57,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9950/12318 [17:13:29<4:05:57,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 24958/154911 [00:41<03:29, 619.52 examples/s]"
      ]
     },
     {
@@ -115652,8 +13199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9951/12318 [17:13:36<4:05:51,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9951/12318 [17:13:36<4:05:51,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25039/154911 [00:41<03:13, 670.53 examples/s]"
      ]
     },
     {
@@ -115661,8 +13207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9952/12318 [17:14:28<4:05:56,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9952/12318 [17:14:28<4:05:56,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25110/154911 [00:41<03:15, 664.63 examples/s]"
      ]
     },
     {
@@ -115670,8 +13215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9953/12318 [17:14:32<4:05:49,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9953/12318 [17:14:32<4:05:49,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25177/154911 [00:41<03:20, 646.91 examples/s]"
      ]
     },
     {
@@ -115679,8 +13223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9954/12318 [17:14:38<4:05:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9954/12318 [17:14:38<4:05:43,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25249/154911 [00:42<03:15, 664.76 examples/s]"
      ]
     },
     {
@@ -115688,8 +13231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9955/12318 [17:14:39<4:05:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9955/12318 [17:14:39<4:05:35,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25326/154911 [00:42<03:06, 694.92 examples/s]"
      ]
     },
     {
@@ -115697,8 +13239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9956/12318 [17:14:46<4:05:29,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9956/12318 [17:14:46<4:05:29,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25398/154911 [00:42<03:17, 656.81 examples/s]"
      ]
     },
     {
@@ -115706,8 +13247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9957/12318 [17:14:49<4:05:22,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9957/12318 [17:14:49<4:05:22,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25470/154911 [00:42<03:12, 672.08 examples/s]"
      ]
     },
     {
@@ -115715,7 +13255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9958/12318 [17:14:58<4:05:16,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  16%|█▍       | 25538/154911 [00:42<03:25, 629.62 examples/s]"
      ]
     },
     {
@@ -115723,7 +13263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9958/12318 [17:14:58<4:05:16,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▍       | 25602/154911 [00:42<03:42, 580.04 examples/s]"
      ]
     },
     {
@@ -115731,8 +13271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9959/12318 [17:14:59<4:05:09,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9959/12318 [17:14:59<4:05:09,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▍       | 25668/154911 [00:42<03:35, 598.74 examples/s]"
      ]
     },
     {
@@ -115740,8 +13279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9960/12318 [17:15:06<4:05:03,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9960/12318 [17:15:06<4:05:03,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▍       | 25729/154911 [00:42<03:35, 599.98 examples/s]"
      ]
     },
     {
@@ -115749,8 +13287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9961/12318 [17:15:13<4:04:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9961/12318 [17:15:13<4:04:57,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▍       | 25791/154911 [00:42<03:41, 582.50 examples/s]"
      ]
     },
     {
@@ -115758,8 +13295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9962/12318 [17:15:18<4:04:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9962/12318 [17:15:18<4:04:50,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 25850/154911 [00:43<03:50, 559.46 examples/s]"
      ]
     },
     {
@@ -115767,8 +13303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9963/12318 [17:15:23<4:04:44,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9963/12318 [17:15:23<4:04:44,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 25907/154911 [00:43<03:52, 554.95 examples/s]"
      ]
     },
     {
@@ -115776,8 +13311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9964/12318 [17:15:30<4:04:38,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9964/12318 [17:15:30<4:04:38,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 25963/154911 [00:43<03:52, 554.42 examples/s]"
      ]
     },
     {
@@ -115785,8 +13319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9965/12318 [17:15:35<4:04:31,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9965/12318 [17:15:35<4:04:31,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26025/154911 [00:43<03:47, 565.96 examples/s]"
      ]
     },
     {
@@ -115794,8 +13327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9966/12318 [17:15:38<4:04:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9966/12318 [17:15:38<4:04:24,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26082/154911 [00:43<03:58, 540.30 examples/s]"
      ]
     },
     {
@@ -115803,8 +13335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9967/12318 [17:15:47<4:04:19,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9967/12318 [17:15:47<4:04:19,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26138/154911 [00:43<03:58, 540.83 examples/s]"
      ]
     },
     {
@@ -115812,8 +13343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9968/12318 [17:15:49<4:04:11,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9968/12318 [17:15:49<4:04:11,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26193/154911 [00:43<04:31, 473.52 examples/s]"
      ]
     },
     {
@@ -115821,8 +13351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9969/12318 [17:15:57<4:04:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9969/12318 [17:15:57<4:04:06,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26255/154911 [00:43<04:11, 511.52 examples/s]"
      ]
     },
     {
@@ -115830,8 +13359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9970/12318 [17:16:04<4:04:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9970/12318 [17:16:04<4:04:00,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26315/154911 [00:43<04:00, 535.54 examples/s]"
      ]
     },
     {
@@ -115839,8 +13367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9971/12318 [17:16:06<4:03:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9971/12318 [17:16:06<4:03:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26377/154911 [00:44<03:50, 558.25 examples/s]"
      ]
     },
     {
@@ -115848,8 +13375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9972/12318 [17:16:08<4:03:45,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9972/12318 [17:16:08<4:03:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26447/154911 [00:44<03:35, 597.01 examples/s]"
      ]
     },
     {
@@ -115857,8 +13383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9973/12318 [17:16:12<4:03:38,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9973/12318 [17:16:12<4:03:38,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26513/154911 [00:44<03:28, 614.49 examples/s]"
      ]
     },
     {
@@ -115866,7 +13391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9974/12318 [17:16:13<4:03:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26577/154911 [00:44<03:48, 560.41 examples/s]"
      ]
     },
     {
@@ -115874,7 +13399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9974/12318 [17:16:13<4:03:31,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26680/154911 [00:44<03:16, 652.34 examples/s]"
      ]
     },
     {
@@ -115882,8 +13407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9975/12318 [17:16:15<4:03:24,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9975/12318 [17:16:15<4:03:24,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26746/154911 [00:44<03:16, 652.85 examples/s]"
      ]
     },
     {
@@ -115891,8 +13415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9976/12318 [17:16:20<4:03:17,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9976/12318 [17:16:20<4:03:17,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26812/154911 [00:44<03:26, 618.96 examples/s]"
      ]
     },
     {
@@ -115900,8 +13423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9977/12318 [17:16:29<4:03:12,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9977/12318 [17:16:29<4:03:12,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26878/154911 [00:44<03:28, 613.70 examples/s]"
      ]
     },
     {
@@ -115909,8 +13431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9978/12318 [17:16:37<4:03:06,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9978/12318 [17:16:37<4:03:06,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 26940/154911 [00:45<04:20, 491.92 examples/s]"
      ]
     },
     {
@@ -115918,7 +13439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9979/12318 [17:16:39<4:02:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 27011/154911 [00:45<03:55, 544.02 examples/s]"
      ]
     },
     {
@@ -115926,7 +13447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9979/12318 [17:16:39<4:02:59,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  17%|█▌       | 27070/154911 [00:45<04:07, 515.49 examples/s]"
      ]
     },
     {
@@ -115934,8 +13455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9980/12318 [17:16:43<4:02:52,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9980/12318 [17:16:43<4:02:52,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27125/154911 [00:45<04:10, 510.29 examples/s]"
      ]
     },
     {
@@ -115943,7 +13463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9981/12318 [17:16:49<4:02:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27178/154911 [00:45<04:10, 510.80 examples/s]"
      ]
     },
     {
@@ -115951,7 +13471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9981/12318 [17:16:49<4:02:45,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27231/154911 [00:45<04:18, 494.32 examples/s]"
      ]
     },
     {
@@ -115959,8 +13479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9982/12318 [17:16:54<4:02:39,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9982/12318 [17:16:54<4:02:39,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27295/154911 [00:45<04:01, 527.92 examples/s]"
      ]
     },
     {
@@ -115968,8 +13487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9983/12318 [17:16:58<4:02:32,  6.23s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9983/12318 [17:16:58<4:02:32,  6.23s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27361/154911 [00:45<03:55, 541.03 examples/s]"
      ]
     },
     {
@@ -115977,8 +13495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9984/12318 [17:17:44<4:02:35,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9984/12318 [17:17:44<4:02:35,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27416/154911 [00:45<04:01, 528.23 examples/s]"
      ]
     },
     {
@@ -115986,8 +13503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9985/12318 [17:17:52<4:02:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9985/12318 [17:17:52<4:02:30,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27479/154911 [00:46<03:49, 555.96 examples/s]"
      ]
     },
     {
@@ -115995,8 +13511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9986/12318 [17:18:00<4:02:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9986/12318 [17:18:00<4:02:24,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27548/154911 [00:46<03:34, 593.65 examples/s]"
      ]
     },
     {
@@ -116004,8 +13519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9987/12318 [17:18:03<4:02:17,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9987/12318 [17:18:03<4:02:17,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27617/154911 [00:46<03:25, 618.50 examples/s]"
      ]
     },
     {
@@ -116013,8 +13527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9988/12318 [17:18:08<4:02:10,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9988/12318 [17:18:08<4:02:10,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27682/154911 [00:46<03:24, 622.38 examples/s]"
      ]
     },
     {
@@ -116022,8 +13535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9989/12318 [17:18:15<4:02:04,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9989/12318 [17:18:15<4:02:04,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27746/154911 [00:46<03:23, 624.35 examples/s]"
      ]
     },
     {
@@ -116031,8 +13543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9990/12318 [17:18:18<4:01:57,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9990/12318 [17:18:18<4:01:57,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▌       | 27809/154911 [00:46<03:41, 573.64 examples/s]"
      ]
     },
     {
@@ -116040,8 +13551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9991/12318 [17:18:22<4:01:50,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9991/12318 [17:18:22<4:01:50,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 27993/154911 [00:46<02:17, 920.35 examples/s]"
      ]
     },
     {
@@ -116049,8 +13559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9992/12318 [17:18:24<4:01:43,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9992/12318 [17:18:24<4:01:43,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28089/154911 [00:46<02:25, 870.37 examples/s]"
      ]
     },
     {
@@ -116058,8 +13567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9993/12318 [17:18:31<4:01:37,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9993/12318 [17:18:31<4:01:37,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28179/154911 [00:46<02:31, 834.19 examples/s]"
      ]
     },
     {
@@ -116067,8 +13575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9994/12318 [17:18:35<4:01:30,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9994/12318 [17:18:35<4:01:30,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28285/154911 [00:47<02:21, 892.07 examples/s]"
      ]
     },
     {
@@ -116076,8 +13583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9995/12318 [17:18:40<4:01:24,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9995/12318 [17:18:40<4:01:24,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28377/154911 [00:47<02:31, 832.93 examples/s]"
      ]
     },
     {
@@ -116085,8 +13591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9996/12318 [17:18:48<4:01:18,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9996/12318 [17:18:48<4:01:18,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28469/154911 [00:47<02:27, 854.52 examples/s]"
      ]
     },
     {
@@ -116094,8 +13599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9997/12318 [17:18:54<4:01:12,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9997/12318 [17:18:54<4:01:12,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  18%|█▋       | 28557/154911 [00:47<02:34, 818.15 examples/s]"
      ]
     },
     {
@@ -116103,8 +13607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9998/12318 [17:19:03<4:01:06,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9998/12318 [17:19:03<4:01:06,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  19%|█▋       | 28681/154911 [00:47<02:15, 929.80 examples/s]"
      ]
     },
     {
@@ -116112,8 +13615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 9999/12318 [17:19:11<4:01:00,  6.24s/it, v_num=e4xv, train/loss\r",
-      "Epoch 0:  81%|▊| 9999/12318 [17:19:11<4:01:00,  6.24s/it, v_num=e4xv, train/loss"
+      "Map (num_proc=64):  19%|█▋       | 28776/154911 [00:47<02:40, 785.33 examples/s]"
      ]
     },
     {
@@ -116121,8 +13623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10000/12318 [17:19:16<4:00:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10000/12318 [17:19:16<4:00:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 28860/154911 [00:47<03:07, 672.61 examples/s]"
      ]
     },
     {
@@ -116130,8 +13631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10001/12318 [17:19:21<4:00:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10001/12318 [17:19:21<4:00:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 28933/154911 [00:48<04:10, 502.50 examples/s]"
      ]
     },
     {
@@ -116139,8 +13639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10002/12318 [17:19:27<4:00:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10002/12318 [17:19:27<4:00:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 28995/154911 [00:48<04:01, 520.92 examples/s]"
      ]
     },
     {
@@ -116148,8 +13647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10003/12318 [17:19:31<4:00:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10003/12318 [17:19:31<4:00:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29056/154911 [00:48<04:28, 469.05 examples/s]"
      ]
     },
     {
@@ -116157,7 +13655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10004/12318 [17:19:36<4:00:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29109/154911 [00:48<04:32, 462.36 examples/s]"
      ]
     },
     {
@@ -116165,7 +13663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10004/12318 [17:19:36<4:00:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29159/154911 [00:48<04:36, 454.77 examples/s]"
      ]
     },
     {
@@ -116173,8 +13671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10005/12318 [17:19:41<4:00:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10005/12318 [17:19:41<4:00:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█��       | 29214/154911 [00:48<04:24, 475.62 examples/s]"
      ]
     },
     {
@@ -116182,8 +13679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10006/12318 [17:19:43<4:00:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10006/12318 [17:19:43<4:00:14,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29264/154911 [00:48<04:26, 471.44 examples/s]"
      ]
     },
     {
@@ -116191,8 +13687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10007/12318 [17:19:47<4:00:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10007/12318 [17:19:47<4:00:07,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29320/154911 [00:48<04:17, 487.65 examples/s]"
      ]
     },
     {
@@ -116200,8 +13695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10008/12318 [17:19:51<4:00:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10008/12318 [17:19:51<4:00:00,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29381/154911 [00:49<04:01, 519.77 examples/s]"
      ]
     },
     {
@@ -116209,8 +13703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10009/12318 [17:19:53<3:59:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10009/12318 [17:19:53<3:59:53,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29447/154911 [00:49<03:49, 547.29 examples/s]"
      ]
     },
     {
@@ -116218,8 +13711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10010/12318 [17:20:02<3:59:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10010/12318 [17:20:02<3:59:48,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29503/154911 [00:49<03:52, 540.19 examples/s]"
      ]
     },
     {
@@ -116227,8 +13719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10011/12318 [17:20:07<3:59:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10011/12318 [17:20:07<3:59:41,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29574/154911 [00:49<03:33, 587.91 examples/s]"
      ]
     },
     {
@@ -116236,8 +13727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10012/12318 [17:20:11<3:59:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10012/12318 [17:20:11<3:59:34,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29634/154911 [00:49<03:39, 570.04 examples/s]"
      ]
     },
     {
@@ -116245,8 +13735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10013/12318 [17:20:14<3:59:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10013/12318 [17:20:14<3:59:27,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29692/154911 [00:49<03:58, 525.20 examples/s]"
      ]
     },
     {
@@ -116254,8 +13743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10014/12318 [17:20:19<3:59:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10014/12318 [17:20:19<3:59:21,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29746/154911 [00:49<04:07, 506.59 examples/s]"
      ]
     },
     {
@@ -116263,8 +13751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10015/12318 [17:20:27<3:59:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10015/12318 [17:20:27<3:59:15,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29812/154911 [00:49<03:52, 539.02 examples/s]"
      ]
     },
     {
@@ -116272,8 +13759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10016/12318 [17:21:05<3:59:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10016/12318 [17:21:05<3:59:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29896/154911 [00:49<03:21, 621.77 examples/s]"
      ]
     },
     {
@@ -116281,8 +13767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10017/12318 [17:21:07<3:59:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10017/12318 [17:21:07<3:59:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 29960/154911 [00:50<03:25, 609.41 examples/s]"
      ]
     },
     {
@@ -116290,8 +13775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10018/12318 [17:21:12<3:59:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10018/12318 [17:21:12<3:59:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 30022/154911 [00:50<03:40, 565.91 examples/s]"
      ]
     },
     {
@@ -116299,8 +13783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10019/12318 [17:21:18<3:58:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10019/12318 [17:21:18<3:58:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▋       | 30085/154911 [00:50<03:46, 550.59 examples/s]"
      ]
     },
     {
@@ -116308,8 +13791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10020/12318 [17:21:26<3:58:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10020/12318 [17:21:26<3:58:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▊       | 30148/154911 [00:50<03:42, 559.95 examples/s]"
      ]
     },
     {
@@ -116317,8 +13799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10021/12318 [17:21:35<3:58:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10021/12318 [17:21:35<3:58:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  19%|█▊       | 30205/154911 [00:50<03:48, 546.01 examples/s]"
      ]
     },
     {
@@ -116326,8 +13807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10022/12318 [17:21:42<3:58:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10022/12318 [17:21:42<3:58:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30270/154911 [00:50<03:37, 573.73 examples/s]"
      ]
     },
     {
@@ -116335,8 +13815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10023/12318 [17:21:51<3:58:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10023/12318 [17:21:51<3:58:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30328/154911 [00:50<05:48, 357.30 examples/s]"
      ]
     },
     {
@@ -116344,8 +13823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10024/12318 [17:21:57<3:58:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10024/12318 [17:21:57<3:58:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30574/154911 [00:51<02:41, 772.17 examples/s]"
      ]
     },
     {
@@ -116353,8 +13831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10025/12318 [17:22:02<3:58:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10025/12318 [17:22:02<3:58:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30678/154911 [00:51<02:36, 794.27 examples/s]"
      ]
     },
     {
@@ -116362,8 +13839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10026/12318 [17:22:10<3:58:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10026/12318 [17:22:10<3:58:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30779/154911 [00:51<02:56, 705.17 examples/s]"
      ]
     },
     {
@@ -116371,8 +13847,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10027/12318 [17:22:17<3:58:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10027/12318 [17:22:17<3:58:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30865/154911 [00:51<03:24, 605.83 examples/s]"
      ]
     },
     {
@@ -116380,8 +13855,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10028/12318 [17:22:25<3:58:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10028/12318 [17:22:25<3:58:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 30941/154911 [00:51<03:32, 582.56 examples/s]"
      ]
     },
     {
@@ -116389,8 +13863,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10029/12318 [17:22:29<3:57:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10029/12318 [17:22:29<3:57:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31009/154911 [00:51<03:32, 583.21 examples/s]"
      ]
     },
     {
@@ -116398,8 +13871,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10030/12318 [17:22:37<3:57:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10030/12318 [17:22:37<3:57:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31074/154911 [00:51<03:49, 539.89 examples/s]"
      ]
     },
     {
@@ -116407,8 +13879,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10031/12318 [17:22:41<3:57:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10031/12318 [17:22:41<3:57:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31132/154911 [00:52<04:07, 500.57 examples/s]"
      ]
     },
     {
@@ -116416,8 +13887,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10032/12318 [17:22:50<3:57:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10032/12318 [17:22:50<3:57:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31187/154911 [00:52<04:03, 508.39 examples/s]"
      ]
     },
     {
@@ -116425,8 +13895,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10033/12318 [17:22:53<3:57:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10033/12318 [17:22:53<3:57:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31246/154911 [00:52<03:55, 526.00 examples/s]"
      ]
     },
     {
@@ -116434,8 +13903,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10034/12318 [17:22:58<3:57:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10034/12318 [17:22:58<3:57:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31301/154911 [00:52<03:56, 523.69 examples/s]"
      ]
     },
     {
@@ -116443,8 +13911,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10035/12318 [17:23:06<3:57:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10035/12318 [17:23:06<3:57:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31362/154911 [00:52<03:46, 546.54 examples/s]"
      ]
     },
     {
@@ -116452,8 +13919,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10036/12318 [17:23:11<3:57:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10036/12318 [17:23:11<3:57:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31439/154911 [00:52<03:26, 597.60 examples/s]"
      ]
     },
     {
@@ -116461,8 +13927,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10037/12318 [17:23:18<3:57:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10037/12318 [17:23:18<3:57:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31514/154911 [00:52<03:12, 639.38 examples/s]"
      ]
     },
     {
@@ -116470,8 +13935,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10038/12318 [17:23:23<3:56:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10038/12318 [17:23:23<3:56:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31580/154911 [00:52<03:24, 602.18 examples/s]"
      ]
     },
     {
@@ -116479,8 +13943,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  81%|▊| 10039/12318 [17:23:31<3:56:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  81%|▊| 10039/12318 [17:23:31<3:56:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31643/154911 [00:52<03:27, 592.84 examples/s]"
      ]
     },
     {
@@ -116488,8 +13951,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10040/12318 [17:23:37<3:56:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10040/12318 [17:23:37<3:56:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  20%|█▊       | 31708/154911 [00:53<03:22, 607.61 examples/s]"
      ]
     },
     {
@@ -116497,8 +13959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10041/12318 [17:23:42<3:56:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10041/12318 [17:23:42<3:56:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 31770/154911 [00:53<03:46, 543.93 examples/s]"
      ]
     },
     {
@@ -116506,8 +13967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10042/12318 [17:23:51<3:56:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10042/12318 [17:23:51<3:56:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 31826/154911 [00:53<04:03, 505.76 examples/s]"
      ]
     },
     {
@@ -116515,8 +13975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10043/12318 [17:23:58<3:56:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10043/12318 [17:23:58<3:56:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 31878/154911 [00:53<04:41, 437.35 examples/s]"
      ]
     },
     {
@@ -116524,8 +13983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10044/12318 [17:24:07<3:56:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10044/12318 [17:24:07<3:56:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 31940/154911 [00:53<04:15, 481.31 examples/s]"
      ]
     },
     {
@@ -116533,8 +13991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|��| 10045/12318 [17:24:11<3:56:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10045/12318 [17:24:11<3:56:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 31991/154911 [00:53<04:42, 435.69 examples/s]"
      ]
     },
     {
@@ -116542,7 +13999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10046/12318 [17:24:19<3:56:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 32037/154911 [00:53<05:04, 403.33 examples/s]"
      ]
     },
     {
@@ -116550,7 +14007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10046/12318 [17:24:19<3:56:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 32090/154911 [00:53<04:52, 420.58 examples/s]"
      ]
     },
     {
@@ -116558,8 +14015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10047/12318 [17:24:26<3:56:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10047/12318 [17:24:26<3:56:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 32149/154911 [00:54<04:26, 461.47 examples/s]"
      ]
     },
     {
@@ -116567,8 +14023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10048/12318 [17:24:41<3:56:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10048/12318 [17:24:41<3:56:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 32202/154911 [00:54<04:17, 476.53 examples/s]"
      ]
     },
     {
@@ -116576,8 +14031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10049/12318 [17:24:50<3:55:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10049/12318 [17:24:50<3:55:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▊       | 32266/154911 [00:54<03:56, 517.52 examples/s]"
      ]
     },
     {
@@ -116585,8 +14039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10050/12318 [17:24:57<3:55:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10050/12318 [17:24:57<3:55:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32320/154911 [00:54<03:59, 510.99 examples/s]"
      ]
     },
     {
@@ -116594,8 +14047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10051/12318 [17:25:03<3:55:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10051/12318 [17:25:03<3:55:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32383/154911 [00:54<03:46, 539.93 examples/s]"
      ]
     },
     {
@@ -116603,8 +14055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10052/12318 [17:25:12<3:55:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10052/12318 [17:25:12<3:55:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32459/154911 [00:54<03:25, 594.69 examples/s]"
      ]
     },
     {
@@ -116612,8 +14063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10053/12318 [17:25:20<3:55:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10053/12318 [17:25:20<3:55:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32540/154911 [00:54<03:14, 628.47 examples/s]"
      ]
     },
     {
@@ -116621,8 +14071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10054/12318 [17:25:26<3:55:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10054/12318 [17:25:26<3:55:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32604/154911 [00:54<03:18, 616.69 examples/s]"
      ]
     },
     {
@@ -116630,8 +14079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10055/12318 [17:25:27<3:55:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10055/12318 [17:25:27<3:55:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32682/154911 [00:54<03:09, 645.34 examples/s]"
      ]
     },
     {
@@ -116639,8 +14087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10056/12318 [17:25:36<3:55:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10056/12318 [17:25:36<3:55:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32747/154911 [00:55<03:12, 634.47 examples/s]"
      ]
     },
     {
@@ -116648,8 +14095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10057/12318 [17:25:40<3:55:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10057/12318 [17:25:40<3:55:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32873/154911 [00:55<02:31, 807.63 examples/s]"
      ]
     },
     {
@@ -116657,8 +14103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10058/12318 [17:25:44<3:54:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10058/12318 [17:25:44<3:54:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 32955/154911 [00:55<04:07, 493.04 examples/s]"
      ]
     },
     {
@@ -116666,8 +14111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10059/12318 [17:25:51<3:54:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10059/12318 [17:25:51<3:54:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 33078/154911 [00:55<03:14, 627.92 examples/s]"
      ]
     },
     {
@@ -116675,8 +14119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10060/12318 [17:25:53<3:54:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10060/12318 [17:25:53<3:54:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 33158/154911 [00:55<03:32, 571.94 examples/s]"
      ]
     },
     {
@@ -116684,8 +14127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10061/12318 [17:26:02<3:54:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10061/12318 [17:26:02<3:54:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 33227/154911 [00:55<03:46, 537.20 examples/s]"
      ]
     },
     {
@@ -116693,8 +14135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10062/12318 [17:26:07<3:54:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10062/12318 [17:26:07<3:54:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  21%|█▉       | 33289/154911 [00:56<03:57, 513.07 examples/s]"
      ]
     },
     {
@@ -116702,8 +14143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10063/12318 [17:26:11<3:54:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10063/12318 [17:26:11<3:54:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33346/154911 [00:56<04:14, 478.11 examples/s]"
      ]
     },
     {
@@ -116711,8 +14151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10064/12318 [17:26:13<3:54:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10064/12318 [17:26:13<3:54:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33398/154911 [00:56<04:15, 475.34 examples/s]"
      ]
     },
     {
@@ -116720,8 +14159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10065/12318 [17:26:21<3:54:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10065/12318 [17:26:21<3:54:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33450/154911 [00:56<04:10, 485.55 examples/s]"
      ]
     },
     {
@@ -116729,8 +14167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10066/12318 [17:26:22<3:54:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10066/12318 [17:26:22<3:54:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33519/154911 [00:56<03:46, 537.04 examples/s]"
      ]
     },
     {
@@ -116738,8 +14175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10067/12318 [17:26:27<3:53:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10067/12318 [17:26:27<3:53:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33584/154911 [00:56<03:34, 565.65 examples/s]"
      ]
     },
     {
@@ -116747,8 +14183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10068/12318 [17:26:35<3:53:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10068/12318 [17:26:35<3:53:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33649/154911 [00:56<03:30, 576.12 examples/s]"
      ]
     },
     {
@@ -116756,8 +14191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10069/12318 [17:26:40<3:53:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10069/12318 [17:26:40<3:53:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33709/154911 [00:56<03:36, 560.59 examples/s]"
      ]
     },
     {
@@ -116765,8 +14199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10070/12318 [17:26:49<3:53:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10070/12318 [17:26:49<3:53:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33769/154911 [00:56<03:32, 570.41 examples/s]"
      ]
     },
     {
@@ -116774,8 +14207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10071/12318 [17:26:53<3:53:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10071/12318 [17:26:53<3:53:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33828/154911 [00:57<03:37, 555.56 examples/s]"
      ]
     },
     {
@@ -116783,8 +14215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10072/12318 [17:27:02<3:53:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10072/12318 [17:27:02<3:53:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33896/154911 [00:57<03:25, 589.37 examples/s]"
      ]
     },
     {
@@ -116792,8 +14223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10073/12318 [17:27:07<3:53:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10073/12318 [17:27:07<3:53:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 33966/154911 [00:57<03:16, 616.02 examples/s]"
      ]
     },
     {
@@ -116801,8 +14231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10074/12318 [17:27:11<3:53:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10074/12318 [17:27:11<3:53:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34042/154911 [00:57<03:06, 649.32 examples/s]"
      ]
     },
     {
@@ -116810,8 +14239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10075/12318 [17:27:18<3:53:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10075/12318 [17:27:18<3:53:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34109/154911 [00:57<03:17, 612.46 examples/s]"
      ]
     },
     {
@@ -116819,8 +14247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10076/12318 [17:27:25<3:53:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10076/12318 [17:27:25<3:53:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34171/154911 [00:57<03:21, 598.16 examples/s]"
      ]
     },
     {
@@ -116828,8 +14255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10077/12318 [17:27:30<3:52:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10077/12318 [17:27:30<3:52:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34232/154911 [00:57<03:34, 562.30 examples/s]"
      ]
     },
     {
@@ -116837,8 +14263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10078/12318 [17:27:38<3:52:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10078/12318 [17:27:38<3:52:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34301/154911 [00:57<03:23, 591.97 examples/s]"
      ]
     },
     {
@@ -116846,8 +14271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10079/12318 [17:27:42<3:52:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10079/12318 [17:27:42<3:52:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|█▉       | 34376/154911 [00:57<03:10, 631.22 examples/s]"
      ]
     },
     {
@@ -116855,8 +14279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10080/12318 [17:27:59<3:52:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10080/12318 [17:27:59<3:52:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|██       | 34493/154911 [00:58<02:34, 777.23 examples/s]"
      ]
     },
     {
@@ -116864,8 +14287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10081/12318 [17:28:02<3:52:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10081/12318 [17:28:02<3:52:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|██       | 34572/154911 [00:58<02:36, 770.60 examples/s]"
      ]
     },
     {
@@ -116873,8 +14295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10082/12318 [17:28:10<3:52:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10082/12318 [17:28:10<3:52:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|██       | 34653/154911 [00:58<02:33, 781.58 examples/s]"
      ]
     },
     {
@@ -116882,8 +14303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10083/12318 [17:28:15<3:52:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10083/12318 [17:28:15<3:52:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|██       | 34732/154911 [00:58<02:44, 730.51 examples/s]"
      ]
     },
     {
@@ -116891,8 +14311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10084/12318 [17:28:20<3:52:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10084/12318 [17:28:20<3:52:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  22%|██       | 34807/154911 [00:58<03:02, 658.86 examples/s]"
      ]
     },
     {
@@ -116900,7 +14319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10085/12318 [17:28:28<3:52:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 34879/154911 [00:58<02:59, 670.03 examples/s]"
      ]
     },
     {
@@ -116908,7 +14327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10085/12318 [17:28:28<3:52:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 34949/154911 [00:58<03:10, 629.45 examples/s]"
      ]
     },
     {
@@ -116916,8 +14335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10086/12318 [17:28:37<3:52:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10086/12318 [17:28:37<3:52:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35014/154911 [00:58<03:12, 623.24 examples/s]"
      ]
     },
     {
@@ -116925,8 +14343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10087/12318 [17:28:43<3:51:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10087/12318 [17:28:43<3:51:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35102/154911 [00:58<02:59, 665.75 examples/s]"
      ]
     },
     {
@@ -116934,8 +14351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10088/12318 [17:28:52<3:51:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10088/12318 [17:28:52<3:51:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35170/154911 [00:59<03:00, 662.86 examples/s]"
      ]
     },
     {
@@ -116943,8 +14359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10089/12318 [17:29:00<3:51:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10089/12318 [17:29:00<3:51:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35252/154911 [00:59<02:51, 696.21 examples/s]"
      ]
     },
     {
@@ -116952,8 +14367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10090/12318 [17:29:05<3:51:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10090/12318 [17:29:05<3:51:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35323/154911 [00:59<02:56, 676.97 examples/s]"
      ]
     },
     {
@@ -116961,8 +14375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10091/12318 [17:29:10<3:51:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10091/12318 [17:29:10<3:51:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35392/154911 [00:59<02:59, 666.99 examples/s]"
      ]
     },
     {
@@ -116970,8 +14383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10092/12318 [17:29:18<3:51:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10092/12318 [17:29:18<3:51:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35469/154911 [00:59<02:53, 690.03 examples/s]"
      ]
     },
     {
@@ -116979,8 +14391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10093/12318 [17:29:25<3:51:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10093/12318 [17:29:25<3:51:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35541/154911 [00:59<02:56, 677.13 examples/s]"
      ]
     },
     {
@@ -116988,8 +14399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10094/12318 [17:29:31<3:51:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10094/12318 [17:29:31<3:51:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35609/154911 [00:59<03:07, 636.72 examples/s]"
      ]
     },
     {
@@ -116997,8 +14407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10095/12318 [17:29:34<3:51:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10095/12318 [17:29:34<3:51:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35674/154911 [00:59<03:12, 620.83 examples/s]"
      ]
     },
     {
@@ -117006,8 +14415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10096/12318 [17:29:37<3:51:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10096/12318 [17:29:37<3:51:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35738/154911 [00:59<03:28, 571.62 examples/s]"
      ]
     },
     {
@@ -117015,8 +14423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10097/12318 [17:29:42<3:50:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10097/12318 [17:29:42<3:50:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35796/154911 [01:00<03:34, 554.59 examples/s]"
      ]
     },
     {
@@ -117024,8 +14431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10098/12318 [17:29:48<3:50:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10098/12318 [17:29:48<3:50:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35852/154911 [01:00<03:47, 523.84 examples/s]"
      ]
     },
     {
@@ -117033,8 +14439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10099/12318 [17:29:55<3:50:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10099/12318 [17:29:55<3:50:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35907/154911 [01:00<04:26, 445.88 examples/s]"
      ]
     },
     {
@@ -117042,8 +14447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10100/12318 [17:29:58<3:50:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10100/12318 [17:29:58<3:50:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 35967/154911 [01:00<04:11, 473.73 examples/s]"
      ]
     },
     {
@@ -117051,8 +14455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10101/12318 [17:30:01<3:50:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10101/12318 [17:30:01<3:50:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36017/154911 [01:00<04:20, 456.82 examples/s]"
      ]
     },
     {
@@ -117060,8 +14463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10102/12318 [17:30:07<3:50:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10102/12318 [17:30:07<3:50:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36086/154911 [01:00<03:51, 512.44 examples/s]"
      ]
     },
     {
@@ -117069,8 +14471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10103/12318 [17:30:16<3:50:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10103/12318 [17:30:16<3:50:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36148/154911 [01:00<03:40, 539.44 examples/s]"
      ]
     },
     {
@@ -117078,8 +14479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10104/12318 [17:30:25<3:50:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10104/12318 [17:30:25<3:50:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36204/154911 [01:00<03:50, 515.05 examples/s]"
      ]
     },
     {
@@ -117087,8 +14487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10105/12318 [17:30:34<3:50:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10105/12318 [17:30:34<3:50:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36257/154911 [01:01<04:08, 476.61 examples/s]"
      ]
     },
     {
@@ -117096,8 +14495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10106/12318 [17:30:43<3:49:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10106/12318 [17:30:43<3:49:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36309/154911 [01:01<04:18, 459.28 examples/s]"
      ]
     },
     {
@@ -117105,8 +14503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10107/12318 [17:30:48<3:49:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10107/12318 [17:30:48<3:49:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36356/154911 [01:01<04:23, 449.12 examples/s]"
      ]
     },
     {
@@ -117114,8 +14511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10108/12318 [17:30:53<3:49:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10108/12318 [17:30:53<3:49:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  23%|██       | 36402/154911 [01:01<04:24, 447.41 examples/s]"
      ]
     },
     {
@@ -117123,8 +14519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10109/12318 [17:31:00<3:49:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10109/12318 [17:31:00<3:49:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██       | 36462/154911 [01:01<04:10, 472.15 examples/s]"
      ]
     },
     {
@@ -117132,8 +14527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10110/12318 [17:31:06<3:49:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10110/12318 [17:31:06<3:49:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██       | 36550/154911 [01:01<03:23, 581.44 examples/s]"
      ]
     },
     {
@@ -117141,8 +14535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10111/12318 [17:31:11<3:49:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10111/12318 [17:31:11<3:49:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36615/154911 [01:01<03:18, 595.92 examples/s]"
      ]
     },
     {
@@ -117150,8 +14543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10112/12318 [17:31:28<3:49:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10112/12318 [17:31:28<3:49:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36696/154911 [01:01<03:00, 654.69 examples/s]"
      ]
     },
     {
@@ -117159,8 +14551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10113/12318 [17:31:35<3:49:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10113/12318 [17:31:35<3:49:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36764/154911 [01:01<03:05, 636.55 examples/s]"
      ]
     },
     {
@@ -117168,8 +14559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10114/12318 [17:31:44<3:49:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10114/12318 [17:31:44<3:49:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36829/154911 [01:02<03:23, 581.10 examples/s]"
      ]
     },
     {
@@ -117177,8 +14567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10115/12318 [17:31:48<3:49:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10115/12318 [17:31:48<3:49:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36889/154911 [01:02<03:41, 533.77 examples/s]"
      ]
     },
     {
@@ -117186,8 +14575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10116/12318 [17:31:57<3:48:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10116/12318 [17:31:57<3:48:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 36944/154911 [01:02<03:39, 536.68 examples/s]"
      ]
     },
     {
@@ -117195,8 +14583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10117/12318 [17:31:59<3:48:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10117/12318 [17:31:59<3:48:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37007/154911 [01:02<03:30, 559.10 examples/s]"
      ]
     },
     {
@@ -117204,7 +14591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10118/12318 [17:32:07<3:48:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37065/154911 [01:02<03:33, 552.83 examples/s]"
      ]
     },
     {
@@ -117212,7 +14599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10118/12318 [17:32:07<3:48:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37138/154911 [01:02<03:16, 599.65 examples/s]"
      ]
     },
     {
@@ -117220,8 +14607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10119/12318 [17:32:16<3:48:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10119/12318 [17:32:16<3:48:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37216/154911 [01:02<03:11, 615.76 examples/s]"
      ]
     },
     {
@@ -117229,8 +14615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10120/12318 [17:32:18<3:48:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10120/12318 [17:32:18<3:48:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37286/154911 [01:02<03:06, 630.09 examples/s]"
      ]
     },
     {
@@ -117238,8 +14623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10121/12318 [17:32:21<3:48:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10121/12318 [17:32:21<3:48:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37363/154911 [01:02<02:58, 658.09 examples/s]"
      ]
     },
     {
@@ -117247,8 +14631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10122/12318 [17:32:26<3:48:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10122/12318 [17:32:26<3:48:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37435/154911 [01:03<02:54, 673.45 examples/s]"
      ]
     },
     {
@@ -117256,8 +14639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10123/12318 [17:32:32<3:48:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10123/12318 [17:32:32<3:48:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37503/154911 [01:03<03:00, 649.20 examples/s]"
      ]
     },
     {
@@ -117265,8 +14647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10124/12318 [17:32:39<3:48:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10124/12318 [17:32:39<3:48:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37572/154911 [01:03<03:01, 647.69 examples/s]"
      ]
     },
     {
@@ -117274,8 +14655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10125/12318 [17:32:40<3:48:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10125/12318 [17:32:40<3:48:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37637/154911 [01:03<03:13, 606.82 examples/s]"
      ]
     },
     {
@@ -117283,7 +14663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10126/12318 [17:32:44<3:47:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37701/154911 [01:03<03:12, 609.30 examples/s]"
      ]
     },
     {
@@ -117291,7 +14671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10126/12318 [17:32:44<3:47:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37781/154911 [01:03<02:57, 660.28 examples/s]"
      ]
     },
     {
@@ -117299,7 +14679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10127/12318 [17:32:48<3:47:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37852/154911 [01:03<02:54, 671.94 examples/s]"
      ]
     },
     {
@@ -117307,7 +14687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10127/12318 [17:32:48<3:47:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  24%|██▏      | 37928/154911 [01:03<02:48, 695.47 examples/s]"
      ]
     },
     {
@@ -117315,8 +14695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10128/12318 [17:32:57<3:47:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10128/12318 [17:32:57<3:47:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38006/154911 [01:03<02:43, 714.62 examples/s]"
      ]
     },
     {
@@ -117324,7 +14703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10129/12318 [17:33:04<3:47:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38079/154911 [01:04<02:58, 655.00 examples/s]"
      ]
     },
     {
@@ -117332,7 +14711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10129/12318 [17:33:04<3:47:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38158/154911 [01:04<02:49, 690.63 examples/s]"
      ]
     },
     {
@@ -117340,8 +14719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10130/12318 [17:33:11<3:47:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10130/12318 [17:33:11<3:47:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38229/154911 [01:04<02:55, 664.40 examples/s]"
      ]
     },
     {
@@ -117349,8 +14727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10131/12318 [17:33:20<3:47:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10131/12318 [17:33:20<3:47:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38298/154911 [01:04<02:54, 669.19 examples/s]"
      ]
     },
     {
@@ -117358,8 +14735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10132/12318 [17:33:23<3:47:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10132/12318 [17:33:23<3:47:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38366/154911 [01:04<02:56, 659.15 examples/s]"
      ]
     },
     {
@@ -117367,8 +14743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10133/12318 [17:33:26<3:47:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10133/12318 [17:33:26<3:47:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38433/154911 [01:04<04:26, 436.46 examples/s]"
      ]
     },
     {
@@ -117376,8 +14751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10134/12318 [17:33:35<3:47:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10134/12318 [17:33:35<3:47:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38566/154911 [01:04<03:08, 616.36 examples/s]"
      ]
     },
     {
@@ -117385,8 +14759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10135/12318 [17:33:39<3:46:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10135/12318 [17:33:39<3:46:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38641/154911 [01:04<03:23, 570.54 examples/s]"
      ]
     },
     {
@@ -117394,8 +14767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10136/12318 [17:33:44<3:46:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10136/12318 [17:33:44<3:46:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▏      | 38708/154911 [01:05<03:40, 526.59 examples/s]"
      ]
     },
     {
@@ -117403,8 +14775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10137/12318 [17:33:46<3:46:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10137/12318 [17:33:46<3:46:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 38768/154911 [01:05<03:47, 510.92 examples/s]"
      ]
     },
     {
@@ -117412,8 +14783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10138/12318 [17:33:50<3:46:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10138/12318 [17:33:50<3:46:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 38825/154911 [01:05<03:59, 485.05 examples/s]"
      ]
     },
     {
@@ -117421,8 +14791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10139/12318 [17:33:55<3:46:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10139/12318 [17:33:55<3:46:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 38877/154911 [01:05<04:10, 463.44 examples/s]"
      ]
     },
     {
@@ -117430,8 +14799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10140/12318 [17:34:04<3:46:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10140/12318 [17:34:04<3:46:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 38943/154911 [01:05<03:50, 504.00 examples/s]"
      ]
     },
     {
@@ -117439,8 +14807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10141/12318 [17:34:07<3:46:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10141/12318 [17:34:07<3:46:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 38996/154911 [01:05<04:04, 474.12 examples/s]"
      ]
     },
     {
@@ -117448,8 +14815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10142/12318 [17:34:15<3:46:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10142/12318 [17:34:15<3:46:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39046/154911 [01:05<04:01, 478.81 examples/s]"
      ]
     },
     {
@@ -117457,8 +14823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10143/12318 [17:34:17<3:46:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10143/12318 [17:34:17<3:46:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39096/154911 [01:05<04:15, 453.11 examples/s]"
      ]
     },
     {
@@ -117466,8 +14831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10144/12318 [17:34:41<3:46:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10144/12318 [17:34:41<3:46:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39161/154911 [01:06<04:00, 481.18 examples/s]"
      ]
     },
     {
@@ -117475,8 +14839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10145/12318 [17:34:43<3:45:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10145/12318 [17:34:43<3:45:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39211/154911 [01:06<08:16, 233.01 examples/s]"
      ]
     },
     {
@@ -117484,8 +14847,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10146/12318 [17:34:51<3:45:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10146/12318 [17:34:51<3:45:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39334/154911 [01:06<05:01, 383.45 examples/s]"
      ]
     },
     {
@@ -117493,8 +14855,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10147/12318 [17:35:00<3:45:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10147/12318 [17:35:00<3:45:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39397/154911 [01:06<04:56, 389.08 examples/s]"
      ]
     },
     {
@@ -117502,8 +14863,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10148/12318 [17:35:04<3:45:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10148/12318 [17:35:04<3:45:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  25%|██▎      | 39453/154911 [01:06<04:41, 410.13 examples/s]"
      ]
     },
     {
@@ -117511,8 +14871,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10149/12318 [17:35:09<3:45:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10149/12318 [17:35:09<3:45:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39507/154911 [01:07<06:02, 318.35 examples/s]"
      ]
     },
     {
@@ -117520,8 +14879,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10150/12318 [17:35:16<3:45:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10150/12318 [17:35:16<3:45:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39595/154911 [01:07<04:36, 416.48 examples/s]"
      ]
     },
     {
@@ -117529,8 +14887,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10151/12318 [17:35:23<3:45:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10151/12318 [17:35:23<3:45:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39652/154911 [01:07<04:41, 410.03 examples/s]"
      ]
     },
     {
@@ -117538,8 +14895,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10152/12318 [17:35:32<3:45:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10152/12318 [17:35:32<3:45:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39710/154911 [01:07<04:19, 443.54 examples/s]"
      ]
     },
     {
@@ -117547,8 +14903,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10153/12318 [17:35:36<3:45:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10153/12318 [17:35:36<3:45:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39763/154911 [01:07<04:18, 445.97 examples/s]"
      ]
     },
     {
@@ -117556,8 +14911,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10154/12318 [17:35:43<3:44:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10154/12318 [17:35:43<3:44:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39862/154911 [01:07<03:19, 576.23 examples/s]"
      ]
     },
     {
@@ -117565,7 +14919,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10155/12318 [17:35:48<3:44:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39928/154911 [01:07<03:29, 549.61 examples/s]"
      ]
     },
     {
@@ -117573,7 +14927,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10155/12318 [17:35:48<3:44:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 39991/154911 [01:08<03:28, 550.01 examples/s]"
      ]
     },
     {
@@ -117581,8 +14935,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10156/12318 [17:35:50<3:44:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10156/12318 [17:35:50<3:44:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40050/154911 [01:08<03:37, 527.44 examples/s]"
      ]
     },
     {
@@ -117590,8 +14943,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10157/12318 [17:35:56<3:44:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10157/12318 [17:35:56<3:44:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40106/154911 [01:08<04:00, 477.22 examples/s]"
      ]
     },
     {
@@ -117599,8 +14951,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10158/12318 [17:36:03<3:44:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10158/12318 [17:36:03<3:44:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40157/154911 [01:08<04:00, 476.51 examples/s]"
      ]
     },
     {
@@ -117608,8 +14959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10159/12318 [17:36:08<3:44:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10159/12318 [17:36:08<3:44:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40211/154911 [01:08<03:56, 484.22 examples/s]"
      ]
     },
     {
@@ -117617,8 +14967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10160/12318 [17:36:14<3:44:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10160/12318 [17:36:14<3:44:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40267/154911 [01:08<03:56, 484.14 examples/s]"
      ]
     },
     {
@@ -117626,8 +14975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10161/12318 [17:36:19<3:44:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10161/12318 [17:36:19<3:44:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40325/154911 [01:08<03:46, 506.61 examples/s]"
      ]
     },
     {
@@ -117635,8 +14983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  82%|▊| 10162/12318 [17:36:22<3:44:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  82%|▊| 10162/12318 [17:36:22<3:44:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40380/154911 [01:08<03:41, 516.22 examples/s]"
      ]
     },
     {
@@ -117644,8 +14991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10163/12318 [17:36:27<3:44:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10163/12318 [17:36:27<3:44:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40434/154911 [01:09<03:40, 518.81 examples/s]"
      ]
     },
     {
@@ -117653,8 +14999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10164/12318 [17:36:35<3:43:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10164/12318 [17:36:35<3:43:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40487/154911 [01:09<03:55, 486.57 examples/s]"
      ]
     },
     {
@@ -117662,8 +15007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10165/12318 [17:36:36<3:43:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10165/12318 [17:36:36<3:43:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40537/154911 [01:09<03:58, 480.09 examples/s]"
      ]
     },
     {
@@ -117671,8 +15015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10166/12318 [17:36:41<3:43:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10166/12318 [17:36:41<3:43:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40586/154911 [01:09<04:00, 475.90 examples/s]"
      ]
     },
     {
@@ -117680,8 +15023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10167/12318 [17:36:45<3:43:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10167/12318 [17:36:45<3:43:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40634/154911 [01:09<04:15, 446.66 examples/s]"
      ]
     },
     {
@@ -117689,8 +15031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10168/12318 [17:36:47<3:43:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10168/12318 [17:36:47<3:43:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40680/154911 [01:09<04:16, 445.48 examples/s]"
      ]
     },
     {
@@ -117698,8 +15039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10169/12318 [17:36:53<3:43:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10169/12318 [17:36:53<3:43:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40726/154911 [01:09<04:14, 448.28 examples/s]"
      ]
     },
     {
@@ -117707,8 +15047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10170/12318 [17:37:02<3:43:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10170/12318 [17:37:02<3:43:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40772/154911 [01:09<04:25, 429.61 examples/s]"
      ]
     },
     {
@@ -117716,8 +15055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10171/12318 [17:37:04<3:43:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10171/12318 [17:37:04<3:43:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▎      | 40818/154911 [01:09<04:26, 428.78 examples/s]"
      ]
     },
     {
@@ -117725,8 +15063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10172/12318 [17:37:09<3:43:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10172/12318 [17:37:09<3:43:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▍      | 40893/154911 [01:09<03:39, 518.73 examples/s]"
      ]
     },
     {
@@ -117734,8 +15071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10173/12318 [17:37:18<3:42:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10173/12318 [17:37:18<3:42:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▍      | 40950/154911 [01:10<03:36, 526.44 examples/s]"
      ]
     },
     {
@@ -117743,8 +15079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10174/12318 [17:37:25<3:42:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10174/12318 [17:37:25<3:42:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  26%|██▍      | 41021/154911 [01:10<03:18, 573.71 examples/s]"
      ]
     },
     {
@@ -117752,8 +15087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10175/12318 [17:37:33<3:42:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10175/12318 [17:37:33<3:42:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41103/154911 [01:10<02:57, 641.50 examples/s]"
      ]
     },
     {
@@ -117761,8 +15095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10176/12318 [17:37:54<3:42:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10176/12318 [17:37:54<3:42:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41173/154911 [01:10<02:53, 653.72 examples/s]"
      ]
     },
     {
@@ -117770,8 +15103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10177/12318 [17:37:59<3:42:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10177/12318 [17:37:59<3:42:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41239/154911 [01:10<03:22, 561.81 examples/s]"
      ]
     },
     {
@@ -117779,8 +15111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10178/12318 [17:38:06<3:42:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10178/12318 [17:38:06<3:42:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41312/154911 [01:10<03:10, 595.68 examples/s]"
      ]
     },
     {
@@ -117788,8 +15119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10179/12318 [17:38:10<3:42:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10179/12318 [17:38:10<3:42:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41380/154911 [01:10<03:11, 592.91 examples/s]"
      ]
     },
     {
@@ -117797,8 +15127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10180/12318 [17:38:18<3:42:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10180/12318 [17:38:18<3:42:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41441/154911 [01:10<03:15, 581.75 examples/s]"
      ]
     },
     {
@@ -117806,8 +15135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10181/12318 [17:38:27<3:42:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10181/12318 [17:38:27<3:42:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41502/154911 [01:11<03:22, 559.21 examples/s]"
      ]
     },
     {
@@ -117815,8 +15143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10182/12318 [17:38:36<3:42:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10182/12318 [17:38:36<3:42:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41560/154911 [01:11<03:25, 550.80 examples/s]"
      ]
     },
     {
@@ -117824,8 +15151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10183/12318 [17:38:40<3:41:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10183/12318 [17:38:40<3:41:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41621/154911 [01:11<03:20, 565.86 examples/s]"
      ]
     },
     {
@@ -117833,8 +15159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10184/12318 [17:38:44<3:41:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10184/12318 [17:38:44<3:41:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41679/154911 [01:11<03:24, 554.49 examples/s]"
      ]
     },
     {
@@ -117842,8 +15167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10185/12318 [17:38:45<3:41:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10185/12318 [17:38:45<3:41:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41742/154911 [01:11<03:19, 566.89 examples/s]"
      ]
     },
     {
@@ -117851,8 +15175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10186/12318 [17:38:54<3:41:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10186/12318 [17:38:54<3:41:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41799/154911 [01:11<03:19, 565.66 examples/s]"
      ]
     },
     {
@@ -117860,8 +15183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10187/12318 [17:39:03<3:41:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10187/12318 [17:39:03<3:41:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41869/154911 [01:11<03:07, 602.46 examples/s]"
      ]
     },
     {
@@ -117869,8 +15191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10188/12318 [17:39:07<3:41:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10188/12318 [17:39:07<3:41:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 41930/154911 [01:11<03:20, 562.57 examples/s]"
      ]
     },
     {
@@ -117878,8 +15199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10189/12318 [17:39:13<3:41:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10189/12318 [17:39:13<3:41:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42023/154911 [01:11<02:50, 663.37 examples/s]"
      ]
     },
     {
@@ -117887,8 +15207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10190/12318 [17:39:15<3:41:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10190/12318 [17:39:15<3:41:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42091/154911 [01:11<02:51, 659.66 examples/s]"
      ]
     },
     {
@@ -117896,8 +15215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10191/12318 [17:39:24<3:41:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10191/12318 [17:39:24<3:41:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42158/154911 [01:12<02:50, 660.78 examples/s]"
      ]
     },
     {
@@ -117905,8 +15223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10192/12318 [17:39:32<3:41:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10192/12318 [17:39:32<3:41:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42226/154911 [01:12<02:49, 665.09 examples/s]"
      ]
     },
     {
@@ -117914,8 +15231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10193/12318 [17:39:37<3:40:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10193/12318 [17:39:37<3:40:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42293/154911 [01:12<03:44, 500.58 examples/s]"
      ]
     },
     {
@@ -117923,8 +15239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10194/12318 [17:39:38<3:40:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10194/12318 [17:39:38<3:40:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42401/154911 [01:12<03:02, 615.07 examples/s]"
      ]
     },
     {
@@ -117932,8 +15247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10195/12318 [17:39:40<3:40:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10195/12318 [17:39:40<3:40:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42473/154911 [01:12<02:55, 640.69 examples/s]"
      ]
     },
     {
@@ -117941,8 +15255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10196/12318 [17:39:47<3:40:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10196/12318 [17:39:47<3:40:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  27%|██▍      | 42542/154911 [01:12<02:57, 633.47 examples/s]"
      ]
     },
     {
@@ -117950,8 +15263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10197/12318 [17:39:54<3:40:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10197/12318 [17:39:54<3:40:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42630/154911 [01:12<02:54, 645.20 examples/s]"
      ]
     },
     {
@@ -117959,8 +15271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10198/12318 [17:39:56<3:40:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10198/12318 [17:39:56<3:40:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42698/154911 [01:12<02:57, 631.54 examples/s]"
      ]
     },
     {
@@ -117968,8 +15279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10199/12318 [17:39:59<3:40:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10199/12318 [17:39:59<3:40:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42768/154911 [01:13<02:52, 649.09 examples/s]"
      ]
     },
     {
@@ -117977,8 +15287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10200/12318 [17:40:04<3:40:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10200/12318 [17:40:04<3:40:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42837/154911 [01:13<02:56, 633.24 examples/s]"
      ]
     },
     {
@@ -117986,8 +15295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10201/12318 [17:40:09<3:40:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10201/12318 [17:40:09<3:40:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42902/154911 [01:13<03:19, 561.36 examples/s]"
      ]
     },
     {
@@ -117995,8 +15303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10202/12318 [17:40:13<3:39:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10202/12318 [17:40:13<3:39:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 42960/154911 [01:13<03:24, 546.54 examples/s]"
      ]
     },
     {
@@ -118004,8 +15311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10203/12318 [17:40:19<3:39:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10203/12318 [17:40:19<3:39:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▍      | 43027/154911 [01:13<03:26, 540.84 examples/s]"
      ]
     },
     {
@@ -118013,8 +15319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10204/12318 [17:40:24<3:39:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10204/12318 [17:40:24<3:39:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43084/154911 [01:13<03:24, 547.72 examples/s]"
      ]
     },
     {
@@ -118022,8 +15327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10205/12318 [17:40:32<3:39:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10205/12318 [17:40:32<3:39:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43141/154911 [01:13<03:21, 553.70 examples/s]"
      ]
     },
     {
@@ -118031,8 +15335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10206/12318 [17:40:36<3:39:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10206/12318 [17:40:36<3:39:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43198/154911 [01:13<03:25, 543.05 examples/s]"
      ]
     },
     {
@@ -118040,8 +15343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10207/12318 [17:40:45<3:39:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10207/12318 [17:40:45<3:39:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43253/154911 [01:13<03:28, 535.36 examples/s]"
      ]
     },
     {
@@ -118049,8 +15351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10208/12318 [17:41:15<3:39:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10208/12318 [17:41:15<3:39:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43310/154911 [01:14<03:25, 543.28 examples/s]"
      ]
     },
     {
@@ -118058,8 +15359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10209/12318 [17:41:19<3:39:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10209/12318 [17:41:19<3:39:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43365/154911 [01:14<03:27, 537.05 examples/s]"
      ]
     },
     {
@@ -118067,8 +15367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10210/12318 [17:41:24<3:39:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10210/12318 [17:41:24<3:39:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43423/154911 [01:14<03:23, 548.27 examples/s]"
      ]
     },
     {
@@ -118076,8 +15375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10211/12318 [17:41:26<3:39:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10211/12318 [17:41:26<3:39:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43478/154911 [01:14<03:39, 508.66 examples/s]"
      ]
     },
     {
@@ -118085,8 +15383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10212/12318 [17:41:30<3:38:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10212/12318 [17:41:30<3:38:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43535/154911 [01:14<03:33, 522.74 examples/s]"
      ]
     },
     {
@@ -118094,8 +15391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10213/12318 [17:41:39<3:38:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10213/12318 [17:41:39<3:38:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43588/154911 [01:14<03:32, 523.53 examples/s]"
      ]
     },
     {
@@ -118103,8 +15399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10214/12318 [17:41:43<3:38:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10214/12318 [17:41:43<3:38:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43655/154911 [01:14<03:19, 557.19 examples/s]"
      ]
     },
     {
@@ -118112,8 +15407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10215/12318 [17:41:50<3:38:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10215/12318 [17:41:50<3:38:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43734/154911 [01:14<02:59, 618.55 examples/s]"
      ]
     },
     {
@@ -118121,8 +15415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10216/12318 [17:41:51<3:38:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10216/12318 [17:41:51<3:38:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43797/154911 [01:15<08:46, 210.86 examples/s]"
      ]
     },
     {
@@ -118130,8 +15423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10217/12318 [17:41:58<3:38:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10217/12318 [17:41:58<3:38:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 43990/154911 [01:15<04:22, 421.99 examples/s]"
      ]
     },
     {
@@ -118139,8 +15431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10218/12318 [17:42:06<3:38:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10218/12318 [17:42:06<3:38:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  28%|██▌      | 44080/154911 [01:15<04:08, 445.28 examples/s]"
      ]
     },
     {
@@ -118148,8 +15439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10219/12318 [17:42:09<3:38:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10219/12318 [17:42:09<3:38:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44159/154911 [01:16<04:24, 418.08 examples/s]"
      ]
     },
     {
@@ -118157,7 +15447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10220/12318 [17:42:18<3:38:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44225/154911 [01:16<04:26, 415.69 examples/s]"
      ]
     },
     {
@@ -118165,7 +15455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10220/12318 [17:42:18<3:38:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44284/154911 [01:16<04:37, 399.17 examples/s]"
      ]
     },
     {
@@ -118173,8 +15463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10221/12318 [17:42:19<3:37:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10221/12318 [17:42:19<3:37:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44340/154911 [01:16<04:19, 426.47 examples/s]"
      ]
     },
     {
@@ -118182,8 +15471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10222/12318 [17:42:28<3:37:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10222/12318 [17:42:28<3:37:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44405/154911 [01:16<03:55, 468.45 examples/s]"
      ]
     },
     {
@@ -118191,8 +15479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10223/12318 [17:42:34<3:37:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10223/12318 [17:42:34<3:37:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44461/154911 [01:16<03:59, 461.41 examples/s]"
      ]
     },
     {
@@ -118200,8 +15487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10224/12318 [17:42:40<3:37:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10224/12318 [17:42:40<3:37:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44514/154911 [01:16<03:58, 463.62 examples/s]"
      ]
     },
     {
@@ -118209,8 +15495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10225/12318 [17:42:47<3:37:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10225/12318 [17:42:47<3:37:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44567/154911 [01:16<03:57, 465.17 examples/s]"
      ]
     },
     {
@@ -118218,8 +15503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10226/12318 [17:42:49<3:37:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10226/12318 [17:42:49<3:37:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44640/154911 [01:17<03:27, 530.19 examples/s]"
      ]
     },
     {
@@ -118227,8 +15511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10227/12318 [17:42:57<3:37:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10227/12318 [17:42:57<3:37:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44710/154911 [01:17<03:12, 571.43 examples/s]"
      ]
     },
     {
@@ -118236,8 +15519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10228/12318 [17:43:00<3:37:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10228/12318 [17:43:00<3:37:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44806/154911 [01:17<02:42, 677.34 examples/s]"
      ]
     },
     {
@@ -118245,8 +15527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10229/12318 [17:43:06<3:37:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10229/12318 [17:43:06<3:37:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44901/154911 [01:17<02:26, 751.38 examples/s]"
      ]
     },
     {
@@ -118254,8 +15535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10230/12318 [17:43:11<3:37:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10230/12318 [17:43:11<3:37:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 44991/154911 [01:17<02:18, 792.90 examples/s]"
      ]
     },
     {
@@ -118263,8 +15543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10231/12318 [17:43:15<3:36:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10231/12318 [17:43:15<3:36:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 45075/154911 [01:17<02:17, 798.08 examples/s]"
      ]
     },
     {
@@ -118272,8 +15551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10232/12318 [17:43:20<3:36:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10232/12318 [17:43:20<3:36:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▌      | 45157/154911 [01:17<02:49, 648.26 examples/s]"
      ]
     },
     {
@@ -118281,8 +15559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10233/12318 [17:43:22<3:36:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10233/12318 [17:43:22<3:36:39,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45228/154911 [01:17<02:57, 616.56 examples/s]"
      ]
     },
     {
@@ -118290,8 +15567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10234/12318 [17:43:29<3:36:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10234/12318 [17:43:29<3:36:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45295/154911 [01:18<03:11, 572.50 examples/s]"
      ]
     },
     {
@@ -118299,8 +15575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10235/12318 [17:43:34<3:36:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10235/12318 [17:43:34<3:36:27,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45357/154911 [01:18<03:24, 534.41 examples/s]"
      ]
     },
     {
@@ -118308,8 +15583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10236/12318 [17:43:36<3:36:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10236/12318 [17:43:36<3:36:20,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45431/154911 [01:18<03:07, 582.89 examples/s]"
      ]
     },
     {
@@ -118317,8 +15591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10237/12318 [17:43:40<3:36:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10237/12318 [17:43:40<3:36:13,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45492/154911 [01:18<03:06, 587.90 examples/s]"
      ]
     },
     {
@@ -118326,8 +15599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10238/12318 [17:43:45<3:36:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10238/12318 [17:43:45<3:36:07,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45563/154911 [01:18<03:01, 602.98 examples/s]"
      ]
     },
     {
@@ -118335,8 +15607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10239/12318 [17:43:51<3:36:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10239/12318 [17:43:51<3:36:00,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45625/154911 [01:18<03:06, 584.71 examples/s]"
      ]
     },
     {
@@ -118344,8 +15615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10240/12318 [17:44:34<3:36:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10240/12318 [17:44:34<3:36:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  29%|██▋      | 45685/154911 [01:18<03:10, 571.92 examples/s]"
      ]
     },
     {
@@ -118353,8 +15623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10241/12318 [17:44:39<3:35:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10241/12318 [17:44:39<3:35:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 45756/154911 [01:18<02:59, 608.77 examples/s]"
      ]
     },
     {
@@ -118362,8 +15631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10242/12318 [17:44:44<3:35:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10242/12318 [17:44:44<3:35:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 45818/154911 [01:18<03:15, 559.34 examples/s]"
      ]
     },
     {
@@ -118371,8 +15639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10243/12318 [17:44:48<3:35:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10243/12318 [17:44:48<3:35:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 45876/154911 [01:19<03:16, 556.07 examples/s]"
      ]
     },
     {
@@ -118380,8 +15647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10244/12318 [17:44:53<3:35:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10244/12318 [17:44:53<3:35:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 45935/154911 [01:19<03:22, 538.51 examples/s]"
      ]
     },
     {
@@ -118389,8 +15655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10245/12318 [17:44:57<3:35:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10245/12318 [17:44:57<3:35:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 45990/154911 [01:19<03:22, 538.78 examples/s]"
      ]
     },
     {
@@ -118398,8 +15663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10246/12318 [17:45:05<3:35:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10246/12318 [17:45:05<3:35:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46075/154911 [01:19<02:55, 621.72 examples/s]"
      ]
     },
     {
@@ -118407,8 +15671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10247/12318 [17:45:12<3:35:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10247/12318 [17:45:12<3:35:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46138/154911 [01:19<03:08, 577.00 examples/s]"
      ]
     },
     {
@@ -118416,8 +15679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10248/12318 [17:45:16<3:35:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10248/12318 [17:45:16<3:35:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46201/154911 [01:19<03:09, 572.56 examples/s]"
      ]
     },
     {
@@ -118425,8 +15687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10249/12318 [17:45:24<3:35:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10249/12318 [17:45:24<3:35:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46285/154911 [01:19<02:50, 636.39 examples/s]"
      ]
     },
     {
@@ -118434,8 +15695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10250/12318 [17:45:26<3:34:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10250/12318 [17:45:26<3:34:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46360/154911 [01:19<02:43, 663.94 examples/s]"
      ]
     },
     {
@@ -118443,8 +15703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10251/12318 [17:45:33<3:34:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10251/12318 [17:45:33<3:34:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46429/154911 [01:19<02:42, 666.51 examples/s]"
      ]
     },
     {
@@ -118452,8 +15711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10252/12318 [17:45:36<3:34:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10252/12318 [17:45:36<3:34:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46497/154911 [01:20<02:51, 631.75 examples/s]"
      ]
     },
     {
@@ -118461,8 +15719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10253/12318 [17:45:43<3:34:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10253/12318 [17:45:43<3:34:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46576/154911 [01:20<02:40, 675.79 examples/s]"
      ]
     },
     {
@@ -118470,8 +15727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10254/12318 [17:45:48<3:34:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10254/12318 [17:45:48<3:34:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46645/154911 [01:20<02:42, 667.67 examples/s]"
      ]
     },
     {
@@ -118479,8 +15735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10255/12318 [17:45:51<3:34:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10255/12318 [17:45:51<3:34:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46715/154911 [01:20<02:40, 675.28 examples/s]"
      ]
     },
     {
@@ -118488,8 +15743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10256/12318 [17:45:57<3:34:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10256/12318 [17:45:57<3:34:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46783/154911 [01:20<02:44, 656.82 examples/s]"
      ]
     },
     {
@@ -118497,8 +15751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10257/12318 [17:46:02<3:34:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10257/12318 [17:46:02<3:34:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46850/154911 [01:20<02:46, 649.59 examples/s]"
      ]
     },
     {
@@ -118506,8 +15759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10258/12318 [17:46:09<3:34:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10258/12318 [17:46:09<3:34:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46919/154911 [01:20<02:49, 638.32 examples/s]"
      ]
     },
     {
@@ -118515,8 +15767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10259/12318 [17:46:17<3:34:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10259/12318 [17:46:17<3:34:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 46984/154911 [01:20<02:48, 639.01 examples/s]"
      ]
     },
     {
@@ -118524,8 +15775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10260/12318 [17:46:18<3:33:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10260/12318 [17:46:18<3:33:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 47051/154911 [01:20<02:52, 625.99 examples/s]"
      ]
     },
     {
@@ -118533,8 +15783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10261/12318 [17:46:21<3:33:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10261/12318 [17:46:21<3:33:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 47119/154911 [01:21<02:48, 640.16 examples/s]"
      ]
     },
     {
@@ -118542,8 +15791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10262/12318 [17:46:28<3:33:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10262/12318 [17:46:28<3:33:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 47184/154911 [01:21<02:56, 609.38 examples/s]"
      ]
     },
     {
@@ -118551,8 +15799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10263/12318 [17:46:30<3:33:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10263/12318 [17:46:30<3:33:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  30%|██▋      | 47246/154911 [01:21<02:56, 611.19 examples/s]"
      ]
     },
     {
@@ -118560,8 +15807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10264/12318 [17:46:33<3:33:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10264/12318 [17:46:33<3:33:26,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▋      | 47333/154911 [01:21<02:37, 683.51 examples/s]"
      ]
     },
     {
@@ -118569,8 +15815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10265/12318 [17:46:36<3:33:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10265/12318 [17:46:36<3:33:19,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47403/154911 [01:21<03:21, 533.45 examples/s]"
      ]
     },
     {
@@ -118578,8 +15823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10266/12318 [17:46:43<3:33:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10266/12318 [17:46:43<3:33:13,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47531/154911 [01:21<02:30, 713.71 examples/s]"
      ]
     },
     {
@@ -118587,8 +15831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10267/12318 [17:46:51<3:33:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10267/12318 [17:46:51<3:33:07,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47612/154911 [01:21<02:36, 686.23 examples/s]"
      ]
     },
     {
@@ -118596,8 +15839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10268/12318 [17:46:52<3:33:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10268/12318 [17:46:52<3:33:00,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47688/154911 [01:21<02:36, 684.94 examples/s]"
      ]
     },
     {
@@ -118605,8 +15847,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10269/12318 [17:46:58<3:32:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10269/12318 [17:46:58<3:32:53,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47762/154911 [01:22<02:41, 663.14 examples/s]"
      ]
     },
     {
@@ -118614,8 +15855,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10270/12318 [17:47:04<3:32:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10270/12318 [17:47:04<3:32:47,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47835/154911 [01:22<02:37, 680.34 examples/s]"
      ]
     },
     {
@@ -118623,8 +15863,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10271/12318 [17:47:12<3:32:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10271/12318 [17:47:12<3:32:41,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47907/154911 [01:22<02:37, 680.29 examples/s]"
      ]
     },
     {
@@ -118632,8 +15871,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10272/12318 [17:47:51<3:32:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10272/12318 [17:47:51<3:32:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 47977/154911 [01:22<02:42, 658.44 examples/s]"
      ]
     },
     {
@@ -118641,8 +15879,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10273/12318 [17:47:54<3:32:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10273/12318 [17:47:54<3:32:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48044/154911 [01:22<02:50, 625.78 examples/s]"
      ]
     },
     {
@@ -118650,8 +15887,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10274/12318 [17:48:01<3:32:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10274/12318 [17:48:01<3:32:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48108/154911 [01:22<02:56, 606.78 examples/s]"
      ]
     },
     {
@@ -118659,8 +15895,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10275/12318 [17:48:04<3:32:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10275/12318 [17:48:04<3:32:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48171/154911 [01:22<02:59, 593.03 examples/s]"
      ]
     },
     {
@@ -118668,8 +15903,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10276/12318 [17:48:06<3:32:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10276/12318 [17:48:06<3:32:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48234/154911 [01:22<03:18, 536.95 examples/s]"
      ]
     },
     {
@@ -118677,8 +15911,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10277/12318 [17:48:08<3:32:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10277/12318 [17:48:08<3:32:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48315/154911 [01:22<02:57, 601.87 examples/s]"
      ]
     },
     {
@@ -118686,8 +15919,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10278/12318 [17:48:14<3:32:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10278/12318 [17:48:14<3:32:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48377/154911 [01:23<03:12, 552.94 examples/s]"
      ]
     },
     {
@@ -118695,8 +15927,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10279/12318 [17:48:18<3:31:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10279/12318 [17:48:18<3:31:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48434/154911 [01:23<03:17, 539.64 examples/s]"
      ]
     },
     {
@@ -118704,8 +15935,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10280/12318 [17:48:23<3:31:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10280/12318 [17:48:23<3:31:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48498/154911 [01:23<03:08, 565.49 examples/s]"
      ]
     },
     {
@@ -118713,8 +15943,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10281/12318 [17:48:29<3:31:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10281/12318 [17:48:29<3:31:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48557/154911 [01:23<03:07, 568.00 examples/s]"
      ]
     },
     {
@@ -118722,8 +15951,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10282/12318 [17:48:35<3:31:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10282/12318 [17:48:35<3:31:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48617/154911 [01:23<03:04, 575.80 examples/s]"
      ]
     },
     {
@@ -118731,8 +15959,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10283/12318 [17:48:39<3:31:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10283/12318 [17:48:39<3:31:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48683/154911 [01:23<02:57, 599.03 examples/s]"
      ]
     },
     {
@@ -118740,8 +15967,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10284/12318 [17:48:44<3:31:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10284/12318 [17:48:44<3:31:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  31%|██▊      | 48744/154911 [01:23<03:03, 579.01 examples/s]"
      ]
     },
     {
@@ -118749,8 +15975,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  83%|▊| 10285/12318 [17:48:48<3:31:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  83%|▊| 10285/12318 [17:48:48<3:31:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 48815/154911 [01:23<03:03, 577.32 examples/s]"
      ]
     },
     {
@@ -118758,8 +15983,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10286/12318 [17:48:56<3:31:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10286/12318 [17:48:56<3:31:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 48893/154911 [01:23<02:48, 630.29 examples/s]"
      ]
     },
     {
@@ -118767,8 +15991,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10287/12318 [17:48:59<3:31:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10287/12318 [17:48:59<3:31:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 48957/154911 [01:24<02:49, 624.61 examples/s]"
      ]
     },
     {
@@ -118776,8 +15999,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10288/12318 [17:49:06<3:30:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10288/12318 [17:49:06<3:30:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49028/154911 [01:24<03:38, 484.22 examples/s]"
      ]
     },
     {
@@ -118785,8 +16007,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10289/12318 [17:49:14<3:30:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10289/12318 [17:49:14<3:30:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49126/154911 [01:24<03:09, 558.78 examples/s]"
      ]
     },
     {
@@ -118794,8 +16015,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10290/12318 [17:49:19<3:30:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10290/12318 [17:49:19<3:30:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49188/154911 [01:24<03:04, 572.69 examples/s]"
      ]
     },
     {
@@ -118803,8 +16023,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10291/12318 [17:49:26<3:30:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10291/12318 [17:49:26<3:30:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49249/154911 [01:24<03:13, 546.73 examples/s]"
      ]
     },
     {
@@ -118812,8 +16031,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10292/12318 [17:49:34<3:30:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10292/12318 [17:49:34<3:30:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49306/154911 [01:24<03:12, 547.73 examples/s]"
      ]
     },
     {
@@ -118821,8 +16039,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10293/12318 [17:49:37<3:30:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10293/12318 [17:49:37<3:30:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49364/154911 [01:24<03:22, 521.04 examples/s]"
      ]
     },
     {
@@ -118830,8 +16047,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10294/12318 [17:49:46<3:30:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10294/12318 [17:49:46<3:30:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49418/154911 [01:24<03:47, 463.97 examples/s]"
      ]
     },
     {
@@ -118839,8 +16055,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10295/12318 [17:49:49<3:30:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10295/12318 [17:49:49<3:30:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▊      | 49467/154911 [01:25<03:53, 452.05 examples/s]"
      ]
     },
     {
@@ -118848,8 +16063,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10296/12318 [17:49:58<3:30:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10296/12318 [17:49:58<3:30:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49514/154911 [01:25<04:16, 410.90 examples/s]"
      ]
     },
     {
@@ -118857,8 +16071,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10297/12318 [17:50:01<3:30:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10297/12318 [17:50:01<3:30:00,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49581/154911 [01:25<03:41, 474.48 examples/s]"
      ]
     },
     {
@@ -118866,8 +16079,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10298/12318 [17:50:07<3:29:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10298/12318 [17:50:07<3:29:54,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49634/154911 [01:25<03:37, 483.99 examples/s]"
      ]
     },
     {
@@ -118875,8 +16087,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10299/12318 [17:50:12<3:29:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10299/12318 [17:50:12<3:29:48,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49687/154911 [01:25<03:43, 470.73 examples/s]"
      ]
     },
     {
@@ -118884,8 +16095,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10300/12318 [17:50:18<3:29:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10300/12318 [17:50:18<3:29:41,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49753/154911 [01:25<03:42, 471.63 examples/s]"
      ]
     },
     {
@@ -118893,8 +16103,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10301/12318 [17:50:25<3:29:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10301/12318 [17:50:25<3:29:35,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49826/154911 [01:25<03:19, 527.53 examples/s]"
      ]
     },
     {
@@ -118902,8 +16111,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10302/12318 [17:50:28<3:29:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10302/12318 [17:50:28<3:29:28,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49880/154911 [01:25<03:25, 510.56 examples/s]"
      ]
     },
     {
@@ -118911,8 +16119,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10303/12318 [17:50:33<3:29:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10303/12318 [17:50:33<3:29:22,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49937/154911 [01:26<03:20, 523.58 examples/s]"
      ]
     },
     {
@@ -118920,8 +16127,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10304/12318 [17:51:14<3:29:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10304/12318 [17:51:14<3:29:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 49991/154911 [01:26<03:22, 517.99 examples/s]"
      ]
     },
     {
@@ -118929,8 +16135,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10305/12318 [17:51:19<3:29:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10305/12318 [17:51:19<3:29:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 50063/154911 [01:26<03:03, 571.32 examples/s]"
      ]
     },
     {
@@ -118938,8 +16143,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10306/12318 [17:51:24<3:29:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10306/12318 [17:51:24<3:29:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 50137/154911 [01:26<02:50, 615.73 examples/s]"
      ]
     },
     {
@@ -118947,8 +16151,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10307/12318 [17:51:29<3:29:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10307/12318 [17:51:29<3:29:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 50208/154911 [01:26<02:44, 637.82 examples/s]"
      ]
     },
     {
@@ -118956,8 +16159,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10308/12318 [17:51:37<3:28:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10308/12318 [17:51:37<3:28:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 50273/154911 [01:26<02:47, 625.34 examples/s]"
      ]
     },
     {
@@ -118965,8 +16167,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10309/12318 [17:51:44<3:28:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10309/12318 [17:51:44<3:28:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  32%|██▉      | 50336/154911 [01:26<02:53, 602.29 examples/s]"
      ]
     },
     {
@@ -118974,8 +16175,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10310/12318 [17:51:48<3:28:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10310/12318 [17:51:48<3:28:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50430/154911 [01:26<02:29, 696.75 examples/s]"
      ]
     },
     {
@@ -118983,8 +16183,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10311/12318 [17:51:52<3:28:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10311/12318 [17:51:52<3:28:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50510/154911 [01:26<02:26, 710.47 examples/s]"
      ]
     },
     {
@@ -118992,8 +16191,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10312/12318 [17:52:00<3:28:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10312/12318 [17:52:00<3:28:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50631/154911 [01:26<02:04, 837.65 examples/s]"
      ]
     },
     {
@@ -119001,8 +16199,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10313/12318 [17:52:02<3:28:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10313/12318 [17:52:02<3:28:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50721/154911 [01:27<02:04, 835.49 examples/s]"
      ]
     },
     {
@@ -119010,8 +16207,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10314/12318 [17:52:09<3:28:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10314/12318 [17:52:09<3:28:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50805/154911 [01:27<02:10, 795.32 examples/s]"
      ]
     },
     {
@@ -119019,8 +16215,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10315/12318 [17:52:15<3:28:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10315/12318 [17:52:15<3:28:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50886/154911 [01:27<02:19, 747.35 examples/s]"
      ]
     },
     {
@@ -119028,8 +16223,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10316/12318 [17:52:21<3:28:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10316/12318 [17:52:21<3:28:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 50985/154911 [01:27<02:08, 810.96 examples/s]"
      ]
     },
     {
@@ -119037,8 +16231,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10317/12318 [17:52:28<3:28:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10317/12318 [17:52:28<3:28:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51068/154911 [01:27<02:37, 658.45 examples/s]"
      ]
     },
     {
@@ -119046,8 +16239,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10318/12318 [17:52:32<3:27:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10318/12318 [17:52:32<3:27:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51172/154911 [01:27<02:21, 734.52 examples/s]"
      ]
     },
     {
@@ -119055,8 +16247,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10319/12318 [17:52:36<3:27:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10319/12318 [17:52:36<3:27:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51251/154911 [01:27<02:39, 648.00 examples/s]"
      ]
     },
     {
@@ -119064,8 +16255,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10320/12318 [17:52:45<3:27:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10320/12318 [17:52:45<3:27:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51321/154911 [01:28<02:49, 609.73 examples/s]"
      ]
     },
     {
@@ -119073,8 +16263,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10321/12318 [17:52:48<3:27:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10321/12318 [17:52:48<3:27:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51386/154911 [01:28<03:20, 516.16 examples/s]"
      ]
     },
     {
@@ -119082,8 +16271,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10322/12318 [17:52:53<3:27:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10322/12318 [17:52:53<3:27:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51443/154911 [01:28<03:17, 522.92 examples/s]"
      ]
     },
     {
@@ -119091,8 +16279,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10323/12318 [17:52:59<3:27:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10323/12318 [17:52:59<3:27:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51499/154911 [01:28<03:18, 520.34 examples/s]"
      ]
     },
     {
@@ -119100,8 +16287,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10324/12318 [17:53:00<3:27:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10324/12318 [17:53:00<3:27:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51555/154911 [01:28<03:18, 521.66 examples/s]"
      ]
     },
     {
@@ -119109,8 +16295,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10325/12318 [17:53:09<3:27:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10325/12318 [17:53:09<3:27:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|██▉      | 51613/154911 [01:28<03:13, 534.64 examples/s]"
      ]
     },
     {
@@ -119118,8 +16303,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10326/12318 [17:53:12<3:27:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10326/12318 [17:53:12<3:27:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|███      | 51681/154911 [01:28<03:00, 571.81 examples/s]"
      ]
     },
     {
@@ -119127,8 +16311,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10327/12318 [17:53:17<3:26:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10327/12318 [17:53:17<3:26:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|███      | 51740/154911 [01:28<03:06, 552.21 examples/s]"
      ]
     },
     {
@@ -119136,8 +16319,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10328/12318 [17:53:21<3:26:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10328/12318 [17:53:21<3:26:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|███      | 51799/154911 [01:28<03:09, 545.49 examples/s]"
      ]
     },
     {
@@ -119145,8 +16327,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10329/12318 [17:53:26<3:26:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10329/12318 [17:53:26<3:26:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  33%|███      | 51858/154911 [01:29<03:04, 557.58 examples/s]"
      ]
     },
     {
@@ -119154,7 +16335,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10330/12318 [17:53:32<3:26:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 51949/154911 [01:29<02:38, 651.63 examples/s]"
      ]
     },
     {
@@ -119162,7 +16343,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10330/12318 [17:53:32<3:26:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52035/154911 [01:29<02:25, 708.77 examples/s]"
      ]
     },
     {
@@ -119170,8 +16351,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10331/12318 [17:53:33<3:26:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10331/12318 [17:53:33<3:26:28,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52133/154911 [01:29<02:20, 731.12 examples/s]"
      ]
     },
     {
@@ -119179,8 +16359,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10332/12318 [17:53:38<3:26:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10332/12318 [17:53:38<3:26:22,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52233/154911 [01:29<02:08, 799.47 examples/s]"
      ]
     },
     {
@@ -119188,8 +16367,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10333/12318 [17:53:47<3:26:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10333/12318 [17:53:47<3:26:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52314/154911 [01:29<02:14, 764.31 examples/s]"
      ]
     },
     {
@@ -119197,8 +16375,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10334/12318 [17:53:55<3:26:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10334/12318 [17:53:55<3:26:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52394/154911 [01:29<02:18, 738.36 examples/s]"
      ]
     },
     {
@@ -119206,8 +16383,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10335/12318 [17:54:03<3:26:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10335/12318 [17:54:03<3:26:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52469/154911 [01:29<02:30, 679.11 examples/s]"
      ]
     },
     {
@@ -119215,8 +16391,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10336/12318 [17:54:34<3:26:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10336/12318 [17:54:34<3:26:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52539/154911 [01:29<02:40, 636.66 examples/s]"
      ]
     },
     {
@@ -119224,8 +16399,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10337/12318 [17:54:38<3:25:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10337/12318 [17:54:38<3:25:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52604/154911 [01:30<02:52, 591.46 examples/s]"
      ]
     },
     {
@@ -119233,8 +16407,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10338/12318 [17:54:47<3:25:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10338/12318 [17:54:47<3:25:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52667/154911 [01:30<03:03, 558.61 examples/s]"
      ]
     },
     {
@@ -119242,8 +16415,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10339/12318 [17:54:54<3:25:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10339/12318 [17:54:54<3:25:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52724/154911 [01:30<03:08, 541.92 examples/s]"
      ]
     },
     {
@@ -119251,8 +16423,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10340/12318 [17:55:01<3:25:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10340/12318 [17:55:01<3:25:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52787/154911 [01:30<03:01, 563.01 examples/s]"
      ]
     },
     {
@@ -119260,8 +16431,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10341/12318 [17:55:06<3:25:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10341/12318 [17:55:06<3:25:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52844/154911 [01:30<03:13, 527.01 examples/s]"
      ]
     },
     {
@@ -119269,8 +16439,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10342/12318 [17:55:10<3:25:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10342/12318 [17:55:10<3:25:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52898/154911 [01:30<03:15, 522.47 examples/s]"
      ]
     },
     {
@@ -119278,8 +16447,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10343/12318 [17:55:13<3:25:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10343/12318 [17:55:13<3:25:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 52951/154911 [01:30<03:20, 508.75 examples/s]"
      ]
     },
     {
@@ -119287,8 +16455,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10344/12318 [17:55:17<3:25:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10344/12318 [17:55:17<3:25:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53003/154911 [01:30<03:33, 476.65 examples/s]"
      ]
     },
     {
@@ -119296,8 +16463,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10345/12318 [17:55:20<3:25:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10345/12318 [17:55:20<3:25:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53060/154911 [01:31<03:24, 497.85 examples/s]"
      ]
     },
     {
@@ -119305,8 +16471,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10346/12318 [17:55:25<3:24:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10346/12318 [17:55:25<3:24:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53112/154911 [01:31<03:26, 493.36 examples/s]"
      ]
     },
     {
@@ -119314,8 +16479,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10347/12318 [17:55:29<3:24:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10347/12318 [17:55:29<3:24:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53191/154911 [01:31<02:57, 572.54 examples/s]"
      ]
     },
     {
@@ -119323,7 +16487,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10348/12318 [17:55:35<3:24:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53250/154911 [01:31<03:08, 538.84 examples/s]"
      ]
     },
     {
@@ -119331,7 +16495,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10348/12318 [17:55:35<3:24:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53315/154911 [01:31<02:59, 564.77 examples/s]"
      ]
     },
     {
@@ -119339,8 +16503,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10349/12318 [17:55:44<3:24:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10349/12318 [17:55:44<3:24:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53383/154911 [01:31<02:51, 593.37 examples/s]"
      ]
     },
     {
@@ -119348,8 +16511,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10350/12318 [17:55:52<3:24:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10350/12318 [17:55:52<3:24:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  34%|███      | 53444/154911 [01:31<02:55, 577.20 examples/s]"
      ]
     },
     {
@@ -119357,8 +16519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10351/12318 [17:55:55<3:24:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10351/12318 [17:55:55<3:24:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███      | 53507/154911 [01:31<02:54, 580.36 examples/s]"
      ]
     },
     {
@@ -119366,8 +16527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10352/12318 [17:56:02<3:24:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10352/12318 [17:56:02<3:24:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███      | 53572/154911 [01:31<02:56, 573.53 examples/s]"
      ]
     },
     {
@@ -119375,8 +16535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10353/12318 [17:56:11<3:24:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10353/12318 [17:56:11<3:24:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███      | 53643/154911 [01:32<02:46, 609.21 examples/s]"
      ]
     },
     {
@@ -119384,8 +16543,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10354/12318 [17:56:14<3:24:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10354/12318 [17:56:14<3:24:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███      | 53706/154911 [01:32<02:44, 614.50 examples/s]"
      ]
     },
     {
@@ -119393,8 +16551,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10355/12318 [17:56:18<3:24:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10355/12318 [17:56:18<3:24:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███      | 53768/154911 [01:32<03:00, 560.11 examples/s]"
      ]
     },
     {
@@ -119402,8 +16559,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10356/12318 [17:56:20<3:23:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10356/12318 [17:56:20<3:23:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 53826/154911 [01:32<03:30, 480.80 examples/s]"
      ]
     },
     {
@@ -119411,7 +16567,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10357/12318 [17:56:28<3:23:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 53877/154911 [01:32<03:48, 442.46 examples/s]"
      ]
     },
     {
@@ -119419,7 +16575,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10357/12318 [17:56:28<3:23:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 53930/154911 [01:32<03:38, 461.53 examples/s]"
      ]
     },
     {
@@ -119427,8 +16583,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10358/12318 [17:56:37<3:23:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10358/12318 [17:56:37<3:23:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 53991/154911 [01:32<03:24, 493.91 examples/s]"
      ]
     },
     {
@@ -119436,8 +16591,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10359/12318 [17:56:45<3:23:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10359/12318 [17:56:45<3:23:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54067/154911 [01:32<02:58, 563.53 examples/s]"
      ]
     },
     {
@@ -119445,8 +16599,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10360/12318 [17:56:49<3:23:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10360/12318 [17:56:49<3:23:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54126/154911 [01:32<03:02, 551.78 examples/s]"
      ]
     },
     {
@@ -119454,8 +16607,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10361/12318 [17:56:58<3:23:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10361/12318 [17:56:58<3:23:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54183/154911 [01:33<03:02, 552.79 examples/s]"
      ]
     },
     {
@@ -119463,8 +16615,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10362/12318 [17:57:04<3:23:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10362/12318 [17:57:04<3:23:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54241/154911 [01:33<04:02, 415.81 examples/s]"
      ]
     },
     {
@@ -119472,8 +16623,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10363/12318 [17:57:09<3:23:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10363/12318 [17:57:09<3:23:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54373/154911 [01:33<02:41, 622.81 examples/s]"
      ]
     },
     {
@@ -119481,8 +16631,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10364/12318 [17:57:14<3:23:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10364/12318 [17:57:14<3:23:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54446/154911 [01:33<03:01, 552.51 examples/s]"
      ]
     },
     {
@@ -119490,8 +16639,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10365/12318 [17:57:22<3:23:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10365/12318 [17:57:22<3:23:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54510/154911 [01:33<03:03, 545.89 examples/s]"
      ]
     },
     {
@@ -119499,8 +16647,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10366/12318 [17:57:23<3:22:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10366/12318 [17:57:23<3:22:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54571/154911 [01:33<03:14, 515.37 examples/s]"
      ]
     },
     {
@@ -119508,8 +16655,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|���| 10367/12318 [17:57:30<3:22:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10367/12318 [17:57:30<3:22:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54627/154911 [01:33<03:28, 481.75 examples/s]"
      ]
     },
     {
@@ -119517,7 +16663,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10368/12318 [17:57:55<3:22:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54679/154911 [01:34<03:28, 480.92 examples/s]"
      ]
     },
     {
@@ -119525,7 +16671,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10368/12318 [17:57:55<3:22:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54730/154911 [01:34<03:31, 473.96 examples/s]"
      ]
     },
     {
@@ -119533,8 +16679,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10369/12318 [17:58:04<3:22:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10369/12318 [17:58:04<3:22:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54779/154911 [01:34<03:38, 458.16 examples/s]"
      ]
     },
     {
@@ -119542,8 +16687,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10370/12318 [17:58:09<3:22:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10370/12318 [17:58:09<3:22:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54839/154911 [01:34<03:28, 479.21 examples/s]"
      ]
     },
     {
@@ -119551,8 +16695,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10371/12318 [17:58:13<3:22:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10371/12318 [17:58:13<3:22:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54892/154911 [01:34<03:29, 477.49 examples/s]"
      ]
     },
     {
@@ -119560,8 +16703,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10372/12318 [17:58:17<3:22:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10372/12318 [17:58:17<3:22:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  35%|███▏     | 54967/154911 [01:34<03:07, 533.46 examples/s]"
      ]
     },
     {
@@ -119569,8 +16711,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10373/12318 [17:58:19<3:22:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10373/12318 [17:58:19<3:22:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55032/154911 [01:34<02:57, 563.40 examples/s]"
      ]
     },
     {
@@ -119578,7 +16719,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10374/12318 [17:58:22<3:22:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55089/154911 [01:34<02:58, 560.60 examples/s]"
      ]
     },
     {
@@ -119586,7 +16727,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10374/12318 [17:58:22<3:22:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55149/154911 [01:34<02:55, 568.89 examples/s]"
      ]
     },
     {
@@ -119594,7 +16735,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10375/12318 [17:58:26<3:21:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55217/154911 [01:35<02:46, 599.16 examples/s]"
      ]
     },
     {
@@ -119602,7 +16743,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10375/12318 [17:58:26<3:21:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55294/154911 [01:35<02:35, 641.79 examples/s]"
      ]
     },
     {
@@ -119610,8 +16751,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10376/12318 [17:58:31<3:21:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10376/12318 [17:58:31<3:21:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55368/154911 [01:35<02:31, 655.99 examples/s]"
      ]
     },
     {
@@ -119619,8 +16759,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10377/12318 [17:58:33<3:21:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10377/12318 [17:58:33<3:21:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55463/154911 [01:35<02:17, 724.84 examples/s]"
      ]
     },
     {
@@ -119628,8 +16767,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10378/12318 [17:58:37<3:21:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10378/12318 [17:58:37<3:21:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55549/154911 [01:35<02:10, 763.48 examples/s]"
      ]
     },
     {
@@ -119637,8 +16775,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10379/12318 [17:58:45<3:21:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10379/12318 [17:58:45<3:21:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55628/154911 [01:35<02:13, 743.76 examples/s]"
      ]
     },
     {
@@ -119646,8 +16783,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10380/12318 [17:58:53<3:21:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10380/12318 [17:58:53<3:21:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55703/154911 [01:35<02:16, 725.53 examples/s]"
      ]
     },
     {
@@ -119655,8 +16791,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10381/12318 [17:58:57<3:21:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10381/12318 [17:58:57<3:21:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55776/154911 [01:35<02:21, 702.60 examples/s]"
      ]
     },
     {
@@ -119664,8 +16799,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10382/12318 [17:59:01<3:21:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10382/12318 [17:59:01<3:21:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55848/154911 [01:35<02:20, 704.64 examples/s]"
      ]
     },
     {
@@ -119673,8 +16807,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10383/12318 [17:59:08<3:21:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10383/12318 [17:59:08<3:21:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▏     | 55919/154911 [01:36<02:34, 639.79 examples/s]"
      ]
     },
     {
@@ -119682,8 +16815,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10384/12318 [17:59:13<3:21:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10384/12318 [17:59:13<3:21:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 55992/154911 [01:36<02:30, 658.89 examples/s]"
      ]
     },
     {
@@ -119691,8 +16823,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10385/12318 [17:59:14<3:20:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10385/12318 [17:59:14<3:20:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56059/154911 [01:36<02:32, 648.87 examples/s]"
      ]
     },
     {
@@ -119700,8 +16831,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10386/12318 [17:59:16<3:20:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10386/12318 [17:59:16<3:20:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56130/154911 [01:36<02:29, 659.09 examples/s]"
      ]
     },
     {
@@ -119709,8 +16839,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10387/12318 [17:59:25<3:20:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10387/12318 [17:59:25<3:20:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56204/154911 [01:36<02:26, 675.20 examples/s]"
      ]
     },
     {
@@ -119718,141 +16847,18 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10388/12318 [17:59:31<3:20:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10388/12318 [17:59:31<3:20:33,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10389/12318 [17:59:39<3:20:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10389/12318 [17:59:39<3:20:28,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10390/12318 [17:59:47<3:20:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10390/12318 [17:59:47<3:20:22,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10391/12318 [17:59:51<3:20:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10391/12318 [17:59:51<3:20:15,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10392/12318 [17:59:57<3:20:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10392/12318 [17:59:57<3:20:09,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10393/12318 [18:00:00<3:20:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10393/12318 [18:00:00<3:20:02,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10394/12318 [18:00:05<3:19:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10394/12318 [18:00:05<3:19:55,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10395/12318 [18:00:10<3:19:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10395/12318 [18:00:10<3:19:49,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10396/12318 [18:00:19<3:19:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10396/12318 [18:00:19<3:19:43,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10397/12318 [18:00:20<3:19:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10397/12318 [18:00:20<3:19:36,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10398/12318 [18:00:28<3:19:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10398/12318 [18:00:28<3:19:30,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10399/12318 [18:00:36<3:19:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10399/12318 [18:00:36<3:19:24,  6.23s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10400/12318 [18:01:08<3:19:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10400/12318 [18:01:08<3:19:23,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10401/12318 [18:01:35<3:19:20,  6.24s/it, v_num=e4xv, train/los"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\r",
-      "Epoch 0:  84%|▊| 10401/12318 [18:01:35<3:19:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56272/154911 [01:36<02:39, 617.35 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[rank: 7] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 7, MEMBER: 8/8\r\n",
+      "[2023-09-02 06:20:40,878] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
       "\r",
-      "Epoch 0:  84%|▊| 10402/12318 [18:01:41<3:19:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10402/12318 [18:01:41<3:19:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56381/154911 [01:36<02:12, 745.53 examples/s]"
      ]
     },
     {
@@ -119860,8 +16866,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10403/12318 [18:01:48<3:19:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10403/12318 [18:01:48<3:19:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  36%|███▎     | 56460/154911 [01:36<02:09, 757.51 examples/s]"
      ]
     },
     {
@@ -119869,8 +16874,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10404/12318 [18:01:50<3:19:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10404/12318 [18:01:50<3:19:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|███▎     | 56560/154911 [01:36<01:59, 826.32 examples/s]"
      ]
     },
     {
@@ -119878,8 +16882,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10405/12318 [18:01:51<3:18:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10405/12318 [18:01:51<3:18:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|███▎     | 56685/154911 [01:36<01:44, 943.86 examples/s]"
      ]
     },
     {
@@ -119887,8 +16890,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10406/12318 [18:01:58<3:18:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10406/12318 [18:01:58<3:18:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|███▎     | 56793/154911 [01:37<01:40, 980.57 examples/s]"
      ]
     },
     {
@@ -119896,8 +16898,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10407/12318 [18:02:05<3:18:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10407/12318 [18:02:05<3:18:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 56909/154911 [01:37<01:35, 1031.12 examples/s]"
      ]
     },
     {
@@ -119905,8 +16906,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  84%|▊| 10408/12318 [18:02:08<3:18:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  84%|▊| 10408/12318 [18:02:08<3:18:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57030/154911 [01:37<01:30, 1076.64 examples/s]"
      ]
     },
     {
@@ -119914,8 +16914,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10409/12318 [18:02:10<3:18:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10409/12318 [18:02:10<3:18:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57141/154911 [01:37<01:31, 1073.11 examples/s]"
      ]
     },
     {
@@ -119923,17 +16922,16 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10410/12318 [18:02:15<3:18:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10410/12318 [18:02:15<3:18:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57266/154911 [01:37<01:26, 1122.73 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  85%|▊| 10411/12318 [18:02:19<3:18:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10411/12318 [18:02:19<3:18:15,  6.24s/it, v_num=e4xv, train/los"
+      "[rank: 4] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 4, MEMBER: 5/8\r\n",
+      "[2023-09-02 06:20:41,745] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
@@ -119941,8 +16939,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10412/12318 [18:02:23<3:18:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10412/12318 [18:02:23<3:18:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57379/154911 [01:37<01:32, 1050.80 examples/s]"
      ]
     },
     {
@@ -119950,8 +16947,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10413/12318 [18:02:31<3:18:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10413/12318 [18:02:31<3:18:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57486/154911 [01:37<01:35, 1020.68 examples/s]"
      ]
     },
     {
@@ -119959,17 +16955,16 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10414/12318 [18:02:35<3:17:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10414/12318 [18:02:35<3:17:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57589/154911 [01:37<01:35, 1015.55 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  85%|▊| 10415/12318 [18:02:40<3:17:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10415/12318 [18:02:40<3:17:49,  6.24s/it, v_num=e4xv, train/los"
+      "[rank: 3] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 3, MEMBER: 4/8\r\n",
+      "[2023-09-02 06:20:42,075] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
@@ -119977,8 +16972,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10416/12318 [18:02:45<3:17:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10416/12318 [18:02:45<3:17:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|███▎     | 57692/154911 [01:37<01:38, 983.80 examples/s]"
      ]
     },
     {
@@ -119986,7 +16980,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10417/12318 [18:02:52<3:17:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57855/154911 [01:38<01:23, 1165.69 examples/s]"
      ]
     },
     {
@@ -119994,7 +16988,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10417/12318 [18:02:52<3:17:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 57974/154911 [01:38<01:32, 1047.34 examples/s]"
      ]
     },
     {
@@ -120002,8 +16996,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10418/12318 [18:03:01<3:17:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10418/12318 [18:03:01<3:17:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  37%|██▉     | 58083/154911 [01:38<01:33, 1033.62 examples/s]"
      ]
     },
     {
@@ -120011,8 +17004,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10419/12318 [18:03:04<3:17:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10419/12318 [18:03:04<3:17:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58194/154911 [01:38<01:32, 1049.49 examples/s]"
      ]
     },
     {
@@ -120020,8 +17012,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10420/12318 [18:03:11<3:17:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10420/12318 [18:03:11<3:17:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58301/154911 [01:38<01:31, 1052.53 examples/s]"
      ]
     },
     {
@@ -120029,8 +17020,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10421/12318 [18:03:20<3:17:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10421/12318 [18:03:20<3:17:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58408/154911 [01:38<01:34, 1016.98 examples/s]"
      ]
     },
     {
@@ -120038,8 +17028,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10422/12318 [18:03:25<3:17:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10422/12318 [18:03:25<3:17:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58550/154911 [01:38<01:25, 1128.77 examples/s]"
      ]
     },
     {
@@ -120047,8 +17036,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10423/12318 [18:03:28<3:16:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10423/12318 [18:03:28<3:16:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58673/154911 [01:38<01:23, 1157.22 examples/s]"
      ]
     },
     {
@@ -120056,8 +17044,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10424/12318 [18:03:35<3:16:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10424/12318 [18:03:35<3:16:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58790/154911 [01:38<01:25, 1121.92 examples/s]"
      ]
     },
     {
@@ -120065,8 +17052,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10425/12318 [18:03:36<3:16:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10425/12318 [18:03:36<3:16:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 58904/154911 [01:39<01:26, 1111.68 examples/s]"
      ]
     },
     {
@@ -120074,8 +17060,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10426/12318 [18:03:42<3:16:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10426/12318 [18:03:42<3:16:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 59032/154911 [01:39<01:23, 1149.33 examples/s]"
      ]
     },
     {
@@ -120083,8 +17068,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10427/12318 [18:03:51<3:16:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10427/12318 [18:03:51<3:16:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███     | 59152/154911 [01:39<01:31, 1043.07 examples/s]"
      ]
     },
     {
@@ -120092,8 +17076,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10428/12318 [18:03:58<3:16:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10428/12318 [18:03:58<3:16:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███▍     | 59260/154911 [01:39<01:39, 958.04 examples/s]"
      ]
     },
     {
@@ -120101,8 +17084,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10429/12318 [18:04:05<3:16:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10429/12318 [18:04:05<3:16:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███▍     | 59360/154911 [01:39<01:38, 965.91 examples/s]"
      ]
     },
     {
@@ -120110,8 +17092,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10430/12318 [18:04:07<3:16:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10430/12318 [18:04:07<3:16:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███▍     | 59459/154911 [01:39<01:45, 904.76 examples/s]"
      ]
     },
     {
@@ -120119,8 +17100,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10431/12318 [18:04:12<3:16:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10431/12318 [18:04:12<3:16:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  38%|███▍     | 59557/154911 [01:39<01:43, 923.02 examples/s]"
      ]
     },
     {
@@ -120128,8 +17108,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10432/12318 [18:04:47<3:16:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10432/12318 [18:04:47<3:16:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 59651/154911 [01:39<01:51, 852.58 examples/s]"
      ]
     },
     {
@@ -120137,8 +17116,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10433/12318 [18:04:56<3:16:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10433/12318 [18:04:56<3:16:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 59738/154911 [01:40<02:19, 681.38 examples/s]"
      ]
     },
     {
@@ -120146,8 +17124,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10434/12318 [18:05:03<3:15:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10434/12318 [18:05:03<3:15:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 59883/154911 [01:40<01:50, 858.40 examples/s]"
      ]
     },
     {
@@ -120155,8 +17132,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10435/12318 [18:05:10<3:15:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10435/12318 [18:05:10<3:15:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 59979/154911 [01:40<01:59, 797.60 examples/s]"
      ]
     },
     {
@@ -120164,8 +17140,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10436/12318 [18:05:16<3:15:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10436/12318 [18:05:16<3:15:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 60095/154911 [01:40<01:47, 885.27 examples/s]"
      ]
     },
     {
@@ -120173,8 +17148,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10437/12318 [18:05:22<3:15:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10437/12318 [18:05:22<3:15:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▍     | 60191/154911 [01:40<02:12, 712.70 examples/s]"
      ]
     },
     {
@@ -120182,8 +17156,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10438/12318 [18:05:30<3:15:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10438/12318 [18:05:30<3:15:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60275/154911 [01:40<02:07, 740.36 examples/s]"
      ]
     },
     {
@@ -120191,8 +17164,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10439/12318 [18:05:37<3:15:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10439/12318 [18:05:37<3:15:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60357/154911 [01:40<02:19, 675.56 examples/s]"
      ]
     },
     {
@@ -120200,8 +17172,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10440/12318 [18:05:39<3:15:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10440/12318 [18:05:39<3:15:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60431/154911 [01:40<02:20, 672.76 examples/s]"
      ]
     },
     {
@@ -120209,8 +17180,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10441/12318 [18:05:48<3:15:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10441/12318 [18:05:48<3:15:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60504/154911 [01:41<02:20, 673.52 examples/s]"
      ]
     },
     {
@@ -120218,8 +17188,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10442/12318 [18:05:55<3:15:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10442/12318 [18:05:55<3:15:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60588/154911 [01:41<02:12, 711.29 examples/s]"
      ]
     },
     {
@@ -120227,8 +17196,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10443/12318 [18:06:02<3:14:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10443/12318 [18:06:02<3:14:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60681/154911 [01:41<02:03, 760.87 examples/s]"
      ]
     },
     {
@@ -120236,8 +17204,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10444/12318 [18:06:08<3:14:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10444/12318 [18:06:08<3:14:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60760/154911 [01:41<02:10, 722.39 examples/s]"
      ]
     },
     {
@@ -120245,8 +17212,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10445/12318 [18:06:16<3:14:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10445/12318 [18:06:16<3:14:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60835/154911 [01:41<02:22, 660.79 examples/s]"
      ]
     },
     {
@@ -120254,8 +17220,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10446/12318 [18:06:22<3:14:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10446/12318 [18:06:22<3:14:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60904/154911 [01:41<02:26, 642.47 examples/s]"
      ]
     },
     {
@@ -120263,8 +17228,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10447/12318 [18:06:25<3:14:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10447/12318 [18:06:25<3:14:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 60979/154911 [01:41<02:20, 667.56 examples/s]"
      ]
     },
     {
@@ -120272,8 +17236,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10448/12318 [18:06:31<3:14:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10448/12318 [18:06:31<3:14:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 61048/154911 [01:41<02:21, 663.23 examples/s]"
      ]
     },
     {
@@ -120281,8 +17244,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10449/12318 [18:06:34<3:14:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10449/12318 [18:06:34<3:14:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  39%|███▌     | 61127/154911 [01:41<02:15, 694.45 examples/s]"
      ]
     },
     {
@@ -120290,8 +17252,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10450/12318 [18:06:42<3:14:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10450/12318 [18:06:42<3:14:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61227/154911 [01:42<02:00, 779.38 examples/s]"
      ]
     },
     {
@@ -120299,8 +17260,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10451/12318 [18:06:48<3:14:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10451/12318 [18:06:48<3:14:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61308/154911 [01:42<02:16, 687.22 examples/s]"
      ]
     },
     {
@@ -120308,8 +17268,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10452/12318 [18:06:50<3:14:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10452/12318 [18:06:50<3:14:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61417/154911 [01:42<01:58, 789.80 examples/s]"
      ]
     },
     {
@@ -120317,8 +17276,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10453/12318 [18:06:55<3:13:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10453/12318 [18:06:55<3:13:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61500/154911 [01:42<01:59, 783.48 examples/s]"
      ]
     },
     {
@@ -120326,8 +17284,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10454/12318 [18:07:03<3:13:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10454/12318 [18:07:03<3:13:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61581/154911 [01:42<02:02, 760.36 examples/s]"
      ]
     },
     {
@@ -120335,8 +17292,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10455/12318 [18:07:12<3:13:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10455/12318 [18:07:12<3:13:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61659/154911 [01:42<02:05, 744.20 examples/s]"
      ]
     },
     {
@@ -120344,8 +17300,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10456/12318 [18:07:18<3:13:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10456/12318 [18:07:18<3:13:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61735/154911 [01:42<02:19, 669.23 examples/s]"
      ]
     },
     {
@@ -120353,8 +17308,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10457/12318 [18:07:24<3:13:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10457/12318 [18:07:24<3:13:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61804/154911 [01:42<02:25, 640.31 examples/s]"
      ]
     },
     {
@@ -120362,8 +17316,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10458/12318 [18:07:27<3:13:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10458/12318 [18:07:27<3:13:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61870/154911 [01:43<02:26, 632.94 examples/s]"
      ]
     },
     {
@@ -120371,8 +17324,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10459/12318 [18:07:31<3:13:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10459/12318 [18:07:31<3:13:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61938/154911 [01:43<03:10, 487.19 examples/s]"
      ]
     },
     {
@@ -120380,8 +17332,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10460/12318 [18:07:39<3:13:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10460/12318 [18:07:39<3:13:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 61994/154911 [01:43<03:08, 491.66 examples/s]"
      ]
     },
     {
@@ -120389,8 +17340,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10461/12318 [18:07:44<3:13:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10461/12318 [18:07:44<3:13:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 62091/154911 [01:43<02:33, 602.79 examples/s]"
      ]
     },
     {
@@ -120398,8 +17348,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10462/12318 [18:07:47<3:12:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10462/12318 [18:07:47<3:12:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 62160/154911 [01:43<02:30, 616.50 examples/s]"
      ]
     },
     {
@@ -120407,8 +17356,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10463/12318 [18:07:52<3:12:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10463/12318 [18:07:52<3:12:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 62226/154911 [01:43<02:31, 610.79 examples/s]"
      ]
     },
     {
@@ -120416,8 +17364,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10464/12318 [18:08:09<3:12:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10464/12318 [18:08:09<3:12:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 62290/154911 [01:43<02:33, 601.57 examples/s]"
      ]
     },
     {
@@ -120425,8 +17372,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10465/12318 [18:08:14<3:12:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10465/12318 [18:08:14<3:12:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▌     | 62357/154911 [01:43<02:29, 619.82 examples/s]"
      ]
     },
     {
@@ -120434,8 +17380,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10466/12318 [18:08:16<3:12:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10466/12318 [18:08:16<3:12:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▋     | 62432/154911 [01:44<02:20, 655.92 examples/s]"
      ]
     },
     {
@@ -120443,8 +17388,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10467/12318 [18:08:19<3:12:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10467/12318 [18:08:19<3:12:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▋     | 62508/154911 [01:44<02:15, 679.80 examples/s]"
      ]
     },
     {
@@ -120452,8 +17396,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10468/12318 [18:08:26<3:12:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10468/12318 [18:08:26<3:12:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▋     | 62577/154911 [01:44<02:15, 681.87 examples/s]"
      ]
     },
     {
@@ -120461,8 +17404,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10469/12318 [18:08:29<3:12:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10469/12318 [18:08:29<3:12:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  40%|███▋     | 62649/154911 [01:44<02:24, 636.80 examples/s]"
      ]
     },
     {
@@ -120470,8 +17412,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10470/12318 [18:08:30<3:12:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10470/12318 [18:08:30<3:12:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 62746/154911 [01:44<02:07, 725.09 examples/s]"
      ]
     },
     {
@@ -120479,8 +17420,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10471/12318 [18:08:34<3:12:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10471/12318 [18:08:34<3:12:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 62820/154911 [01:44<02:13, 688.36 examples/s]"
      ]
     },
     {
@@ -120488,8 +17428,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10472/12318 [18:08:43<3:11:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10472/12318 [18:08:43<3:11:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 62915/154911 [01:44<02:01, 754.59 examples/s]"
      ]
     },
     {
@@ -120497,8 +17436,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10473/12318 [18:08:48<3:11:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10473/12318 [18:08:48<3:11:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 63020/154911 [01:44<01:51, 826.60 examples/s]"
      ]
     },
     {
@@ -120506,8 +17444,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10474/12318 [18:08:55<3:11:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10474/12318 [18:08:55<3:11:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 63127/154911 [01:44<01:43, 889.18 examples/s]"
      ]
     },
     {
@@ -120515,8 +17452,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10475/12318 [18:08:56<3:11:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10475/12318 [18:08:56<3:11:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 63219/154911 [01:44<01:42, 897.80 examples/s]"
      ]
     },
     {
@@ -120524,8 +17460,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10476/12318 [18:09:00<3:11:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10476/12318 [18:09:00<3:11:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 63324/154911 [01:45<01:37, 939.94 examples/s]"
      ]
     },
     {
@@ -120533,8 +17468,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10477/12318 [18:09:06<3:11:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10477/12318 [18:09:06<3:11:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▋     | 63437/154911 [01:45<01:33, 981.59 examples/s]"
      ]
     },
     {
@@ -120542,8 +17476,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10478/12318 [18:09:11<3:11:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10478/12318 [18:09:11<3:11:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 63610/154911 [01:45<01:16, 1190.32 examples/s]"
      ]
     },
     {
@@ -120551,8 +17484,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10479/12318 [18:09:17<3:11:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10479/12318 [18:09:17<3:11:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 63730/154911 [01:45<01:17, 1180.13 examples/s]"
      ]
     },
     {
@@ -120560,8 +17492,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10480/12318 [18:09:23<3:11:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10480/12318 [18:09:23<3:11:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 63861/154911 [01:45<01:15, 1204.14 examples/s]"
      ]
     },
     {
@@ -120569,8 +17500,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10481/12318 [18:09:27<3:10:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10481/12318 [18:09:27<3:10:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 64001/154911 [01:45<01:12, 1259.21 examples/s]"
      ]
     },
     {
@@ -120578,17 +17508,18 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10482/12318 [18:09:29<3:10:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10482/12318 [18:09:29<3:10:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 64128/154911 [01:45<01:17, 1176.71 examples/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "[rank: 5] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 5, MEMBER: 6/8\r\n",
+      "[2023-09-02 06:20:50,033] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n",
       "\r",
-      "Epoch 0:  85%|▊| 10483/12318 [18:09:34<3:10:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10483/12318 [18:09:34<3:10:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  41%|███▎    | 64247/154911 [01:45<01:23, 1092.29 examples/s]"
      ]
     },
     {
@@ -120596,8 +17527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10484/12318 [18:09:38<3:10:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10484/12318 [18:09:38<3:10:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64359/154911 [01:45<01:28, 1028.75 examples/s]"
      ]
     },
     {
@@ -120605,8 +17535,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10485/12318 [18:09:44<3:10:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10485/12318 [18:09:44<3:10:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64465/154911 [01:46<01:30, 1002.96 examples/s]"
      ]
     },
     {
@@ -120614,8 +17543,9 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10486/12318 [18:09:48<3:10:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10486/12318 [18:09:48<3:10:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64570/154911 [01:46<01:28, 1015.20 examples/s][rank: 1] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 1, MEMBER: 2/8\r\n",
+      "[2023-09-02 06:20:50,377] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
@@ -120623,8 +17553,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10487/12318 [18:09:50<3:10:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10487/12318 [18:09:50<3:10:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64699/154911 [01:46<01:22, 1091.38 examples/s]"
      ]
     },
     {
@@ -120632,8 +17561,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10488/12318 [18:09:56<3:10:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10488/12318 [18:09:56<3:10:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64842/154911 [01:46<01:15, 1186.29 examples/s]"
      ]
     },
     {
@@ -120641,8 +17569,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10489/12318 [18:09:59<3:10:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10489/12318 [18:09:59<3:10:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 64964/154911 [01:46<01:15, 1192.87 examples/s]"
      ]
     },
     {
@@ -120650,8 +17577,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10490/12318 [18:10:03<3:09:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10490/12318 [18:10:03<3:09:57,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 65087/154911 [01:46<01:17, 1165.60 examples/s]"
      ]
     },
     {
@@ -120659,8 +17585,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10491/12318 [18:10:10<3:09:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10491/12318 [18:10:10<3:09:51,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 65205/154911 [01:46<01:17, 1157.33 examples/s]"
      ]
     },
     {
@@ -120668,8 +17593,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10492/12318 [18:10:16<3:09:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10492/12318 [18:10:16<3:09:44,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▎    | 65334/154911 [01:46<01:15, 1193.61 examples/s]"
      ]
     },
     {
@@ -120677,8 +17601,9 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10493/12318 [18:10:21<3:09:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10493/12318 [18:10:21<3:09:38,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▍    | 65458/154911 [01:46<01:14, 1206.44 examples/s][rank: 2] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 2, MEMBER: 3/8\r\n",
+      "[2023-09-02 06:20:51,124] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
@@ -120686,8 +17611,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10494/12318 [18:10:29<3:09:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10494/12318 [18:10:29<3:09:32,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▍    | 65594/154911 [01:47<01:12, 1239.45 examples/s]"
      ]
     },
     {
@@ -120695,8 +17619,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10495/12318 [18:10:36<3:09:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10495/12318 [18:10:36<3:09:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  42%|███▍    | 65720/154911 [01:47<01:21, 1090.59 examples/s]"
      ]
     },
     {
@@ -120704,8 +17627,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10496/12318 [18:11:33<3:09:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10496/12318 [18:11:33<3:09:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 65916/154911 [01:47<01:07, 1323.21 examples/s]"
      ]
     },
     {
@@ -120713,8 +17635,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10497/12318 [18:11:34<3:09:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10497/12318 [18:11:34<3:09:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 66064/154911 [01:47<01:05, 1365.18 examples/s]"
      ]
     },
     {
@@ -120722,8 +17643,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10498/12318 [18:11:40<3:09:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10498/12318 [18:11:40<3:09:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 66241/154911 [01:47<00:59, 1478.44 examples/s]"
      ]
     },
     {
@@ -120731,8 +17651,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10499/12318 [18:11:46<3:09:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10499/12318 [18:11:46<3:09:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 66451/154911 [01:47<00:53, 1657.11 examples/s]"
      ]
     },
     {
@@ -120740,8 +17659,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10500/12318 [18:11:50<3:09:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10500/12318 [18:11:50<3:09:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 66675/154911 [01:47<00:48, 1822.05 examples/s]"
      ]
     },
     {
@@ -120749,8 +17667,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10501/12318 [18:11:55<3:08:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10501/12318 [18:11:55<3:08:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 66876/154911 [01:47<00:46, 1875.74 examples/s]"
      ]
     },
     {
@@ -120758,8 +17675,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10502/12318 [18:12:01<3:08:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10502/12318 [18:12:01<3:08:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 67068/154911 [01:47<00:47, 1860.92 examples/s]"
      ]
     },
     {
@@ -120767,8 +17683,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10503/12318 [18:12:09<3:08:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10503/12318 [18:12:09<3:08:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  43%|███▍    | 67256/154911 [01:47<00:48, 1822.61 examples/s]"
      ]
     },
     {
@@ -120776,8 +17691,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10504/12318 [18:12:13<3:08:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10504/12318 [18:12:13<3:08:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▍    | 67452/154911 [01:48<00:47, 1859.10 examples/s]"
      ]
     },
     {
@@ -120785,8 +17699,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10505/12318 [18:12:18<3:08:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10505/12318 [18:12:18<3:08:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▍    | 67673/154911 [01:48<00:44, 1947.24 examples/s]"
      ]
     },
     {
@@ -120794,8 +17707,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10506/12318 [18:12:22<3:08:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10506/12318 [18:12:22<3:08:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 67917/154911 [01:48<00:41, 2087.85 examples/s]"
      ]
     },
     {
@@ -120803,8 +17715,9 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10507/12318 [18:12:29<3:08:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10507/12318 [18:12:29<3:08:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 68130/154911 [01:48<00:42, 2051.63 examples/s][rank: 6] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 6, MEMBER: 7/8\r\n",
+      "[2023-09-02 06:20:52,577] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
@@ -120812,8 +17725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10508/12318 [18:12:33<3:08:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10508/12318 [18:12:33<3:08:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 68338/154911 [01:48<00:43, 1980.04 examples/s]"
      ]
     },
     {
@@ -120821,8 +17733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10509/12318 [18:12:36<3:08:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10509/12318 [18:12:36<3:08:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 68538/154911 [01:48<00:44, 1921.03 examples/s]"
      ]
     },
     {
@@ -120830,8 +17741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10510/12318 [18:12:45<3:07:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10510/12318 [18:12:45<3:07:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 68731/154911 [01:48<00:45, 1904.71 examples/s]"
      ]
     },
     {
@@ -120839,8 +17749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10511/12318 [18:12:47<3:07:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10511/12318 [18:12:47<3:07:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  44%|███▌    | 68923/154911 [01:48<00:45, 1900.74 examples/s]"
      ]
     },
     {
@@ -120848,8 +17757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10512/12318 [18:12:49<3:07:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10512/12318 [18:12:49<3:07:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 69114/154911 [01:48<00:47, 1792.76 examples/s]"
      ]
     },
     {
@@ -120857,8 +17765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10513/12318 [18:12:52<3:07:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10513/12318 [18:12:52<3:07:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 69325/154911 [01:49<00:45, 1880.31 examples/s]"
      ]
     },
     {
@@ -120866,8 +17773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10514/12318 [18:12:59<3:07:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10514/12318 [18:12:59<3:07:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 69541/154911 [01:49<00:43, 1945.93 examples/s]"
      ]
     },
     {
@@ -120875,8 +17781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10515/12318 [18:13:04<3:07:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10515/12318 [18:13:04<3:07:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 69742/154911 [01:49<00:43, 1951.22 examples/s]"
      ]
     },
     {
@@ -120884,8 +17789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10516/12318 [18:13:10<3:07:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10516/12318 [18:13:10<3:07:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 69943/154911 [01:49<00:43, 1965.71 examples/s]"
      ]
     },
     {
@@ -120893,8 +17797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10517/12318 [18:13:17<3:07:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10517/12318 [18:13:17<3:07:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▌    | 70143/154911 [01:49<00:43, 1964.10 examples/s]"
      ]
     },
     {
@@ -120902,8 +17805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10518/12318 [18:13:26<3:07:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10518/12318 [18:13:26<3:07:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  45%|███▋    | 70342/154911 [01:49<00:44, 1903.89 examples/s]"
      ]
     },
     {
@@ -120911,8 +17813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10519/12318 [18:13:32<3:07:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10519/12318 [18:13:32<3:07:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 70536/154911 [01:49<00:46, 1831.11 examples/s]"
      ]
     },
     {
@@ -120920,7 +17821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10520/12318 [18:13:40<3:06:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 70782/154911 [01:49<00:46, 1827.92 examples/s]"
      ]
     },
     {
@@ -120928,7 +17829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10520/12318 [18:13:40<3:06:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 70966/154911 [01:49<00:46, 1813.32 examples/s]"
      ]
     },
     {
@@ -120936,8 +17837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10521/12318 [18:13:48<3:06:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10521/12318 [18:13:48<3:06:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 71148/154911 [01:50<00:46, 1792.59 examples/s]"
      ]
     },
     {
@@ -120945,8 +17845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10522/12318 [18:13:49<3:06:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10522/12318 [18:13:49<3:06:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 71328/154911 [01:50<00:46, 1785.47 examples/s]"
      ]
     },
     {
@@ -120954,8 +17853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10523/12318 [18:13:52<3:06:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10523/12318 [18:13:52<3:06:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 71507/154911 [01:50<00:48, 1717.45 examples/s]"
      ]
     },
     {
@@ -120963,8 +17861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10524/12318 [18:13:56<3:06:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10524/12318 [18:13:56<3:06:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 71681/154911 [01:50<00:48, 1717.25 examples/s]"
      ]
     },
     {
@@ -120972,8 +17869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10525/12318 [18:14:01<3:06:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10525/12318 [18:14:01<3:06:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  46%|███▋    | 71858/154911 [01:50<00:47, 1732.26 examples/s]"
      ]
     },
     {
@@ -120981,8 +17877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10526/12318 [18:14:06<3:06:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10526/12318 [18:14:06<3:06:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▋    | 72049/154911 [01:50<00:46, 1765.74 examples/s]"
      ]
     },
     {
@@ -120990,8 +17885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10527/12318 [18:14:09<3:06:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10527/12318 [18:14:09<3:06:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▋    | 72247/154911 [01:50<00:45, 1824.79 examples/s]"
      ]
     },
     {
@@ -120999,8 +17893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10528/12318 [18:15:03<3:06:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10528/12318 [18:15:03<3:06:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▋    | 72452/154911 [01:50<00:43, 1890.00 examples/s]"
      ]
     },
     {
@@ -121008,8 +17901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10529/12318 [18:15:05<3:06:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10529/12318 [18:15:05<3:06:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▊    | 72711/154911 [01:50<00:39, 2085.19 examples/s]"
      ]
     },
     {
@@ -121017,8 +17909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10530/12318 [18:15:10<3:05:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  85%|▊| 10530/12318 [18:15:10<3:05:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▊    | 72922/154911 [01:50<00:39, 2083.69 examples/s]"
      ]
     },
     {
@@ -121026,7 +17917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10531/12318 [18:15:18<3:05:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▊    | 73131/154911 [01:51<00:41, 1951.81 examples/s]"
      ]
     },
     {
@@ -121034,7 +17925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  85%|▊| 10531/12318 [18:15:18<3:05:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▊    | 73354/154911 [01:51<00:40, 2028.44 examples/s]"
      ]
     },
     {
@@ -121042,8 +17933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10532/12318 [18:15:23<3:05:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10532/12318 [18:15:23<3:05:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  47%|███▊    | 73559/154911 [01:51<00:40, 2020.53 examples/s]"
      ]
     },
     {
@@ -121051,8 +17941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10533/12318 [18:15:29<3:05:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10533/12318 [18:15:29<3:05:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 73764/154911 [01:51<00:41, 1967.39 examples/s]"
      ]
     },
     {
@@ -121060,8 +17949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10534/12318 [18:15:34<3:05:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10534/12318 [18:15:34<3:05:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 73963/154911 [01:51<00:42, 1907.37 examples/s]"
      ]
     },
     {
@@ -121069,8 +17957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10535/12318 [18:15:43<3:05:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10535/12318 [18:15:43<3:05:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 74155/154911 [01:51<00:43, 1845.01 examples/s]"
      ]
     },
     {
@@ -121078,8 +17965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10536/12318 [18:15:44<3:05:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10536/12318 [18:15:44<3:05:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 74341/154911 [01:51<00:44, 1818.10 examples/s]"
      ]
     },
     {
@@ -121087,8 +17973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10537/12318 [18:15:51<3:05:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10537/12318 [18:15:51<3:05:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 74525/154911 [01:51<00:45, 1775.73 examples/s]"
      ]
     },
     {
@@ -121096,8 +17981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10538/12318 [18:15:57<3:05:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10538/12318 [18:15:57<3:05:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 74703/154911 [01:51<00:46, 1741.28 examples/s]"
      ]
     },
     {
@@ -121105,8 +17989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10539/12318 [18:16:00<3:05:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10539/12318 [18:16:00<3:05:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▊    | 74878/154911 [01:52<00:46, 1730.39 examples/s]"
      ]
     },
     {
@@ -121114,8 +17997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10540/12318 [18:16:02<3:04:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10540/12318 [18:16:02<3:04:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  48%|███▉    | 75072/154911 [01:52<00:44, 1781.59 examples/s]"
      ]
     },
     {
@@ -121123,8 +18005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10541/12318 [18:16:04<3:04:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10541/12318 [18:16:04<3:04:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 75251/154911 [01:52<00:46, 1726.95 examples/s]"
      ]
     },
     {
@@ -121132,8 +18013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10542/12318 [18:16:10<3:04:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10542/12318 [18:16:10<3:04:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 75429/154911 [01:52<00:45, 1739.81 examples/s]"
      ]
     },
     {
@@ -121141,8 +18021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10543/12318 [18:16:19<3:04:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10543/12318 [18:16:19<3:04:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 75618/154911 [01:52<00:44, 1772.17 examples/s]"
      ]
     },
     {
@@ -121150,8 +18029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10544/12318 [18:16:21<3:04:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10544/12318 [18:16:21<3:04:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 75816/154911 [01:52<00:43, 1829.90 examples/s]"
      ]
     },
     {
@@ -121159,8 +18037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10545/12318 [18:16:28<3:04:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10545/12318 [18:16:28<3:04:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 76105/154911 [01:52<00:37, 2127.31 examples/s]"
      ]
     },
     {
@@ -121168,8 +18045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10546/12318 [18:16:35<3:04:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10546/12318 [18:16:35<3:04:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 76320/154911 [01:52<00:38, 2029.03 examples/s]"
      ]
     },
     {
@@ -121177,8 +18053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10547/12318 [18:16:39<3:04:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10547/12318 [18:16:39<3:04:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  49%|███▉    | 76533/154911 [01:52<00:39, 1989.81 examples/s]"
      ]
     },
     {
@@ -121186,7 +18061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10548/12318 [18:16:47<3:04:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|███▉    | 76748/154911 [01:52<00:38, 2033.12 examples/s]"
      ]
     },
     {
@@ -121194,7 +18069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10548/12318 [18:16:47<3:04:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|███▉    | 76953/154911 [01:53<00:39, 1980.75 examples/s]"
      ]
     },
     {
@@ -121202,8 +18077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10549/12318 [18:16:52<3:03:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10549/12318 [18:16:52<3:03:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|███▉    | 77152/154911 [01:53<00:39, 1965.88 examples/s]"
      ]
     },
     {
@@ -121211,8 +18085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10550/12318 [18:16:56<3:03:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10550/12318 [18:16:56<3:03:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|███▉    | 77351/154911 [01:53<00:40, 1905.73 examples/s]"
      ]
     },
     {
@@ -121220,8 +18093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10551/12318 [18:16:59<3:03:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10551/12318 [18:16:59<3:03:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|████    | 77544/154911 [01:53<00:42, 1829.68 examples/s]"
      ]
     },
     {
@@ -121229,8 +18101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10552/12318 [18:17:02<3:03:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10552/12318 [18:17:02<3:03:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|████    | 77729/154911 [01:53<00:43, 1783.92 examples/s]"
      ]
     },
     {
@@ -121238,8 +18109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10553/12318 [18:17:09<3:03:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10553/12318 [18:17:09<3:03:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|████    | 77940/154911 [01:53<00:41, 1874.49 examples/s]"
      ]
     },
     {
@@ -121247,8 +18117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10554/12318 [18:17:13<3:03:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10554/12318 [18:17:13<3:03:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  50%|████    | 78130/154911 [01:53<00:41, 1863.75 examples/s]"
      ]
     },
     {
@@ -121256,8 +18125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10555/12318 [18:17:20<3:03:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10555/12318 [18:17:20<3:03:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 78357/154911 [01:53<00:38, 1974.82 examples/s]"
      ]
     },
     {
@@ -121265,7 +18133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10556/12318 [18:17:22<3:03:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 78559/154911 [01:53<00:43, 1774.37 examples/s]"
      ]
     },
     {
@@ -121273,7 +18141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10556/12318 [18:17:22<3:03:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 78758/154911 [01:54<00:41, 1828.50 examples/s]"
      ]
     },
     {
@@ -121281,8 +18149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10557/12318 [18:17:23<3:03:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10557/12318 [18:17:23<3:03:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 78946/154911 [01:54<00:41, 1829.71 examples/s]"
      ]
     },
     {
@@ -121290,8 +18157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10558/12318 [18:17:32<3:02:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10558/12318 [18:17:32<3:02:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 79133/154911 [01:54<00:43, 1760.41 examples/s]"
      ]
     },
     {
@@ -121299,8 +18165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10559/12318 [18:17:38<3:02:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10559/12318 [18:17:38<3:02:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 79313/154911 [01:54<00:43, 1754.97 examples/s]"
      ]
     },
     {
@@ -121308,8 +18173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10560/12318 [18:18:22<3:02:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10560/12318 [18:18:22<3:02:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 79490/154911 [01:54<00:43, 1743.22 examples/s]"
      ]
     },
     {
@@ -121317,8 +18181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10561/12318 [18:18:26<3:02:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10561/12318 [18:18:26<3:02:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  51%|████    | 79668/154911 [01:54<00:44, 1696.09 examples/s]"
      ]
     },
     {
@@ -121326,8 +18189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10562/12318 [18:18:32<3:02:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10562/12318 [18:18:32<3:02:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████    | 79870/154911 [01:54<00:42, 1777.90 examples/s]"
      ]
     },
     {
@@ -121335,8 +18197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10563/12318 [18:18:41<3:02:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10563/12318 [18:18:41<3:02:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80050/154911 [01:54<00:42, 1762.60 examples/s]"
      ]
     },
     {
@@ -121344,8 +18205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10564/12318 [18:18:47<3:02:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10564/12318 [18:18:47<3:02:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80227/154911 [01:54<00:42, 1759.08 examples/s]"
      ]
     },
     {
@@ -121353,8 +18213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10565/12318 [18:18:55<3:02:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10565/12318 [18:18:55<3:02:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80404/154911 [01:55<00:44, 1678.25 examples/s]"
      ]
     },
     {
@@ -121362,8 +18221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10566/12318 [18:18:56<3:02:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10566/12318 [18:18:56<3:02:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80573/154911 [01:55<00:44, 1670.37 examples/s]"
      ]
     },
     {
@@ -121371,8 +18229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10567/12318 [18:19:03<3:02:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10567/12318 [18:19:03<3:02:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80752/154911 [01:55<00:43, 1692.46 examples/s]"
      ]
     },
     {
@@ -121380,8 +18237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10568/12318 [18:19:09<3:02:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10568/12318 [18:19:09<3:02:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 80922/154911 [01:55<00:44, 1680.77 examples/s]"
      ]
     },
     {
@@ -121389,8 +18245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10569/12318 [18:19:18<3:01:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10569/12318 [18:19:18<3:01:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 81127/154911 [01:55<00:41, 1782.03 examples/s]"
      ]
     },
     {
@@ -121398,8 +18253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10570/12318 [18:19:19<3:01:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|���| 10570/12318 [18:19:19<3:01:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  52%|████▏   | 81311/154911 [01:55<00:41, 1791.54 examples/s]"
      ]
     },
     {
@@ -121407,8 +18261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10571/12318 [18:19:26<3:01:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10571/12318 [18:19:26<3:01:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▏   | 81491/154911 [01:55<00:43, 1680.24 examples/s]"
      ]
     },
     {
@@ -121416,8 +18269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10572/12318 [18:19:29<3:01:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10572/12318 [18:19:29<3:01:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▏   | 81716/154911 [01:55<00:39, 1836.82 examples/s]"
      ]
     },
     {
@@ -121425,8 +18277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10573/12318 [18:19:30<3:01:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10573/12318 [18:19:30<3:01:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▏   | 81910/154911 [01:55<00:39, 1849.53 examples/s]"
      ]
     },
     {
@@ -121434,8 +18285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10574/12318 [18:19:33<3:01:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10574/12318 [18:19:33<3:01:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▏   | 82098/154911 [01:55<00:41, 1744.14 examples/s]"
      ]
     },
     {
@@ -121443,8 +18293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10575/12318 [18:19:39<3:01:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10575/12318 [18:19:39<3:01:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▏   | 82276/154911 [01:56<00:44, 1637.54 examples/s]"
      ]
     },
     {
@@ -121452,8 +18301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10576/12318 [18:19:47<3:01:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10576/12318 [18:19:47<3:01:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▎   | 82443/154911 [01:56<00:46, 1549.73 examples/s]"
      ]
     },
     {
@@ -121461,8 +18309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10577/12318 [18:19:55<3:01:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10577/12318 [18:19:55<3:01:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▎   | 82601/154911 [01:56<00:48, 1488.60 examples/s]"
      ]
     },
     {
@@ -121470,8 +18317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10578/12318 [18:19:59<3:00:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10578/12318 [18:19:59<3:00:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  53%|████▎   | 82754/154911 [01:56<00:50, 1440.65 examples/s]"
      ]
     },
     {
@@ -121479,8 +18325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10579/12318 [18:20:05<3:00:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10579/12318 [18:20:05<3:00:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 82901/154911 [01:56<00:51, 1405.05 examples/s]"
      ]
     },
     {
@@ -121488,8 +18333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10580/12318 [18:20:13<3:00:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10580/12318 [18:20:13<3:00:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 83054/154911 [01:56<00:50, 1436.22 examples/s]"
      ]
     },
     {
@@ -121497,8 +18341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10581/12318 [18:20:16<3:00:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10581/12318 [18:20:16<3:00:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 83269/154911 [01:56<00:44, 1620.84 examples/s]"
      ]
     },
     {
@@ -121506,8 +18349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10582/12318 [18:20:25<3:00:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10582/12318 [18:20:25<3:00:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 83507/154911 [01:56<00:38, 1835.19 examples/s]"
      ]
     },
     {
@@ -121515,8 +18357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10583/12318 [18:20:27<3:00:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10583/12318 [18:20:27<3:00:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 83696/154911 [01:56<00:38, 1837.86 examples/s]"
      ]
     },
     {
@@ -121524,8 +18365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10584/12318 [18:20:31<3:00:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10584/12318 [18:20:31<3:00:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 83883/154911 [01:57<00:42, 1662.08 examples/s]"
      ]
     },
     {
@@ -121533,8 +18373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10585/12318 [18:20:40<3:00:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10585/12318 [18:20:40<3:00:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 84056/154911 [01:57<00:44, 1592.40 examples/s]"
      ]
     },
     {
@@ -121542,8 +18381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10586/12318 [18:20:47<3:00:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10586/12318 [18:20:47<3:00:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 84220/154911 [01:57<00:46, 1530.76 examples/s]"
      ]
     },
     {
@@ -121551,8 +18389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10587/12318 [18:20:52<2:59:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10587/12318 [18:20:52<2:59:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  54%|████▎   | 84377/154911 [01:57<00:46, 1527.13 examples/s]"
      ]
     },
     {
@@ -121560,8 +18397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10588/12318 [18:20:57<2:59:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10588/12318 [18:20:57<2:59:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▎   | 84532/154911 [01:57<00:46, 1506.99 examples/s]"
      ]
     },
     {
@@ -121569,8 +18405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10589/12318 [18:21:04<2:59:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10589/12318 [18:21:04<2:59:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▎   | 84686/154911 [01:57<00:47, 1481.85 examples/s]"
      ]
     },
     {
@@ -121578,8 +18413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10590/12318 [18:21:12<2:59:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10590/12318 [18:21:12<2:59:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 84835/154911 [01:57<00:47, 1472.65 examples/s]"
      ]
     },
     {
@@ -121587,8 +18421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10591/12318 [18:21:19<2:59:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10591/12318 [18:21:19<2:59:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 84995/154911 [01:57<00:46, 1505.76 examples/s]"
      ]
     },
     {
@@ -121596,8 +18429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10592/12318 [18:21:41<2:59:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10592/12318 [18:21:41<2:59:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85148/154911 [01:57<00:47, 1476.00 examples/s]"
      ]
     },
     {
@@ -121605,8 +18437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10593/12318 [18:21:49<2:59:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10593/12318 [18:21:49<2:59:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85310/154911 [01:58<00:45, 1513.26 examples/s]"
      ]
     },
     {
@@ -121614,8 +18445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10594/12318 [18:21:58<2:59:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10594/12318 [18:21:58<2:59:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85467/154911 [01:58<00:45, 1524.76 examples/s]"
      ]
     },
     {
@@ -121623,8 +18453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10595/12318 [18:22:07<2:59:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10595/12318 [18:22:07<2:59:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85634/154911 [01:58<00:44, 1549.59 examples/s]"
      ]
     },
     {
@@ -121632,8 +18461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10596/12318 [18:22:11<2:59:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10596/12318 [18:22:11<2:59:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85808/154911 [01:58<00:43, 1582.88 examples/s]"
      ]
     },
     {
@@ -121641,8 +18469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10597/12318 [18:22:14<2:59:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10597/12318 [18:22:14<2:59:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  55%|████▍   | 85971/154911 [01:58<00:43, 1596.28 examples/s]"
      ]
     },
     {
@@ -121650,8 +18477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10598/12318 [18:22:22<2:58:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10598/12318 [18:22:22<2:58:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86140/154911 [01:58<00:42, 1623.09 examples/s]"
      ]
     },
     {
@@ -121659,8 +18485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10599/12318 [18:22:31<2:58:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10599/12318 [18:22:31<2:58:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86305/154911 [01:58<00:42, 1600.92 examples/s]"
      ]
     },
     {
@@ -121668,8 +18493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10600/12318 [18:22:35<2:58:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10600/12318 [18:22:35<2:58:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86467/154911 [01:58<00:43, 1576.38 examples/s]"
      ]
     },
     {
@@ -121677,8 +18501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10601/12318 [18:22:36<2:58:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10601/12318 [18:22:36<2:58:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86625/154911 [01:58<00:43, 1570.45 examples/s]"
      ]
     },
     {
@@ -121686,8 +18509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10602/12318 [18:22:38<2:58:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10602/12318 [18:22:38<2:58:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86783/154911 [01:59<00:43, 1563.40 examples/s]"
      ]
     },
     {
@@ -121695,8 +18517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10603/12318 [18:22:45<2:58:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10603/12318 [18:22:45<2:58:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 86954/154911 [01:59<00:42, 1593.00 examples/s]"
      ]
     },
     {
@@ -121704,8 +18525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10604/12318 [18:22:49<2:58:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10604/12318 [18:22:49<2:58:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▍   | 87115/154911 [01:59<00:42, 1594.05 examples/s]"
      ]
     },
     {
@@ -121713,8 +18533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10605/12318 [18:22:57<2:58:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10605/12318 [18:22:57<2:58:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▌   | 87281/154911 [01:59<00:42, 1596.20 examples/s]"
      ]
     },
     {
@@ -121722,8 +18541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10606/12318 [18:23:03<2:58:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10606/12318 [18:23:03<2:58:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  56%|████▌   | 87445/154911 [01:59<00:42, 1592.74 examples/s]"
      ]
     },
     {
@@ -121731,8 +18549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10607/12318 [18:23:07<2:57:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10607/12318 [18:23:07<2:57:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 87617/154911 [01:59<00:41, 1625.96 examples/s]"
      ]
     },
     {
@@ -121740,8 +18557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10608/12318 [18:23:15<2:57:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10608/12318 [18:23:15<2:57:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 87797/154911 [01:59<00:40, 1676.07 examples/s]"
      ]
     },
     {
@@ -121749,8 +18565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10609/12318 [18:23:18<2:57:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10609/12318 [18:23:18<2:57:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 87991/154911 [01:59<00:38, 1752.14 examples/s]"
      ]
     },
     {
@@ -121758,8 +18573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10610/12318 [18:23:19<2:57:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10610/12318 [18:23:19<2:57:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 88177/154911 [01:59<00:37, 1780.09 examples/s]"
      ]
     },
     {
@@ -121767,8 +18581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10611/12318 [18:23:27<2:57:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10611/12318 [18:23:27<2:57:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 88356/154911 [01:59<00:37, 1761.58 examples/s]"
      ]
     },
     {
@@ -121776,8 +18589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10612/12318 [18:23:35<2:57:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10612/12318 [18:23:35<2:57:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 88533/154911 [02:00<00:39, 1671.49 examples/s]"
      ]
     },
     {
@@ -121785,8 +18597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10613/12318 [18:23:38<2:57:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10613/12318 [18:23:38<2:57:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 88703/154911 [02:00<00:40, 1625.55 examples/s]"
      ]
     },
     {
@@ -121794,8 +18605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10614/12318 [18:23:47<2:57:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10614/12318 [18:23:47<2:57:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 88879/154911 [02:00<00:39, 1652.08 examples/s]"
      ]
     },
     {
@@ -121803,8 +18613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10615/12318 [18:23:55<2:57:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10615/12318 [18:23:55<2:57:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  57%|████▌   | 89046/154911 [02:00<00:40, 1638.27 examples/s]"
      ]
     },
     {
@@ -121812,8 +18621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10616/12318 [18:24:00<2:56:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10616/12318 [18:24:00<2:56:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▌   | 89211/154911 [02:00<00:41, 1582.01 examples/s]"
      ]
     },
     {
@@ -121821,8 +18629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10617/12318 [18:24:02<2:56:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10617/12318 [18:24:02<2:56:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▌   | 89370/154911 [02:00<00:44, 1465.65 examples/s]"
      ]
     },
     {
@@ -121830,8 +18637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10618/12318 [18:24:07<2:56:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10618/12318 [18:24:07<2:56:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▌   | 89542/154911 [02:00<00:43, 1512.08 examples/s]"
      ]
     },
     {
@@ -121839,8 +18645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10619/12318 [18:24:10<2:56:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10619/12318 [18:24:10<2:56:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 89700/154911 [02:00<00:42, 1519.91 examples/s]"
      ]
     },
     {
@@ -121848,8 +18653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10620/12318 [18:24:12<2:56:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10620/12318 [18:24:12<2:56:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 89854/154911 [02:00<00:49, 1325.28 examples/s]"
      ]
     },
     {
@@ -121857,8 +18661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10621/12318 [18:24:17<2:56:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10621/12318 [18:24:17<2:56:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 90084/154911 [02:01<00:41, 1576.80 examples/s]"
      ]
     },
     {
@@ -121866,8 +18669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10622/12318 [18:24:25<2:56:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10622/12318 [18:24:25<2:56:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 90249/154911 [02:01<00:42, 1531.92 examples/s]"
      ]
     },
     {
@@ -121875,8 +18677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10623/12318 [18:24:29<2:56:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10623/12318 [18:24:29<2:56:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 90409/154911 [02:01<00:42, 1517.53 examples/s]"
      ]
     },
     {
@@ -121884,8 +18685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10624/12318 [18:24:49<2:56:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10624/12318 [18:24:49<2:56:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  58%|████▋   | 90566/154911 [02:01<00:43, 1489.70 examples/s]"
      ]
     },
     {
@@ -121893,8 +18693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10625/12318 [18:24:58<2:56:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10625/12318 [18:24:58<2:56:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 90726/154911 [02:01<00:42, 1508.63 examples/s]"
      ]
     },
     {
@@ -121902,8 +18701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10626/12318 [18:25:06<2:55:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10626/12318 [18:25:06<2:55:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 90880/154911 [02:01<00:42, 1504.01 examples/s]"
      ]
     },
     {
@@ -121911,8 +18709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10627/12318 [18:25:11<2:55:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10627/12318 [18:25:11<2:55:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91034/154911 [02:01<00:42, 1511.93 examples/s]"
      ]
     },
     {
@@ -121920,8 +18717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10628/12318 [18:25:14<2:55:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10628/12318 [18:25:14<2:55:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91187/154911 [02:01<00:42, 1492.87 examples/s]"
      ]
     },
     {
@@ -121929,8 +18725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10629/12318 [18:25:19<2:55:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10629/12318 [18:25:19<2:55:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91338/154911 [02:01<00:42, 1490.56 examples/s]"
      ]
     },
     {
@@ -121938,8 +18733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10630/12318 [18:25:28<2:55:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10630/12318 [18:25:28<2:55:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91488/154911 [02:02<00:42, 1482.38 examples/s]"
      ]
     },
     {
@@ -121947,8 +18741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10631/12318 [18:25:31<2:55:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10631/12318 [18:25:31<2:55:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91667/154911 [02:02<00:40, 1572.22 examples/s]"
      ]
     },
     {
@@ -121956,8 +18749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10632/12318 [18:25:37<2:55:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10632/12318 [18:25:37<2:55:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▋   | 91857/154911 [02:02<00:37, 1667.79 examples/s]"
      ]
     },
     {
@@ -121965,8 +18757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10633/12318 [18:25:42<2:55:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10633/12318 [18:25:42<2:55:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  59%|████▊   | 92025/154911 [02:02<00:38, 1644.47 examples/s]"
      ]
     },
     {
@@ -121974,8 +18765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10634/12318 [18:25:51<2:55:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10634/12318 [18:25:51<2:55:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92190/154911 [02:02<00:40, 1542.58 examples/s]"
      ]
     },
     {
@@ -121983,8 +18773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10635/12318 [18:25:53<2:55:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10635/12318 [18:25:53<2:55:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92346/154911 [02:02<00:40, 1541.82 examples/s]"
      ]
     },
     {
@@ -121992,8 +18781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10636/12318 [18:25:59<2:54:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10636/12318 [18:25:59<2:54:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92503/154911 [02:02<00:40, 1525.01 examples/s]"
      ]
     },
     {
@@ -122001,8 +18789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10637/12318 [18:26:04<2:54:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10637/12318 [18:26:04<2:54:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92658/154911 [02:02<00:41, 1495.02 examples/s]"
      ]
     },
     {
@@ -122010,8 +18797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10638/12318 [18:26:06<2:54:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10638/12318 [18:26:06<2:54:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92810/154911 [02:02<00:41, 1484.21 examples/s]"
      ]
     },
     {
@@ -122019,8 +18805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10639/12318 [18:26:08<2:54:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10639/12318 [18:26:08<2:54:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 92961/154911 [02:02<00:42, 1465.87 examples/s]"
      ]
     },
     {
@@ -122028,8 +18813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10640/12318 [18:26:15<2:54:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10640/12318 [18:26:15<2:54:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 93110/154911 [02:03<00:42, 1463.40 examples/s]"
      ]
     },
     {
@@ -122037,8 +18821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10641/12318 [18:26:24<2:54:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10641/12318 [18:26:24<2:54:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 93266/154911 [02:03<00:41, 1485.90 examples/s]"
      ]
     },
     {
@@ -122046,8 +18829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10642/12318 [18:26:33<2:54:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10642/12318 [18:26:33<2:54:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 93425/154911 [02:03<00:40, 1513.86 examples/s]"
      ]
     },
     {
@@ -122055,8 +18837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10643/12318 [18:26:37<2:54:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10643/12318 [18:26:37<2:54:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  60%|████▊   | 93578/154911 [02:03<00:40, 1502.30 examples/s]"
      ]
     },
     {
@@ -122064,8 +18845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10644/12318 [18:26:45<2:54:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10644/12318 [18:26:45<2:54:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▊   | 93729/154911 [02:03<00:42, 1453.80 examples/s]"
      ]
     },
     {
@@ -122073,8 +18853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10645/12318 [18:26:54<2:53:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10645/12318 [18:26:54<2:53:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▊   | 93886/154911 [02:03<00:41, 1475.62 examples/s]"
      ]
     },
     {
@@ -122082,8 +18861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10646/12318 [18:26:59<2:53:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10646/12318 [18:26:59<2:53:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▊   | 94036/154911 [02:03<00:41, 1482.65 examples/s]"
      ]
     },
     {
@@ -122091,8 +18869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10647/12318 [18:27:06<2:53:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10647/12318 [18:27:06<2:53:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▊   | 94191/154911 [02:03<00:41, 1473.45 examples/s]"
      ]
     },
     {
@@ -122100,8 +18877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10648/12318 [18:27:10<2:53:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10648/12318 [18:27:10<2:53:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▊   | 94379/154911 [02:03<00:38, 1585.69 examples/s]"
      ]
     },
     {
@@ -122109,8 +18885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10649/12318 [18:27:16<2:53:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10649/12318 [18:27:16<2:53:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▉   | 94578/154911 [02:04<00:35, 1703.69 examples/s]"
      ]
     },
     {
@@ -122118,8 +18893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10650/12318 [18:27:21<2:53:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10650/12318 [18:27:21<2:53:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▉   | 94749/154911 [02:04<00:35, 1686.10 examples/s]"
      ]
     },
     {
@@ -122127,7 +18901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10651/12318 [18:27:27<2:53:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▉   | 94920/154911 [02:04<00:37, 1620.00 examples/s]"
      ]
     },
     {
@@ -122135,7 +18909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10651/12318 [18:27:27<2:53:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▉   | 95083/154911 [02:04<00:37, 1585.29 examples/s]"
      ]
     },
     {
@@ -122143,8 +18917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10652/12318 [18:27:34<2:53:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10652/12318 [18:27:34<2:53:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  61%|████▉   | 95243/154911 [02:04<00:38, 1546.90 examples/s]"
      ]
     },
     {
@@ -122152,8 +18925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10653/12318 [18:27:41<2:53:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10653/12318 [18:27:41<2:53:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 95401/154911 [02:04<00:39, 1501.84 examples/s]"
      ]
     },
     {
@@ -122161,8 +18933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10654/12318 [18:27:50<2:53:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10654/12318 [18:27:50<2:53:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 95553/154911 [02:04<00:40, 1478.52 examples/s]"
      ]
     },
     {
@@ -122170,8 +18941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  86%|▊| 10655/12318 [18:27:51<2:52:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  86%|▊| 10655/12318 [18:27:51<2:52:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 95702/154911 [02:04<00:41, 1411.40 examples/s]"
      ]
     },
     {
@@ -122179,8 +18949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10656/12318 [18:28:08<2:52:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10656/12318 [18:28:08<2:52:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 95844/154911 [02:04<00:42, 1395.37 examples/s]"
      ]
     },
     {
@@ -122188,8 +18957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10657/12318 [18:28:14<2:52:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10657/12318 [18:28:14<2:52:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 95984/154911 [02:05<00:42, 1380.96 examples/s]"
      ]
     },
     {
@@ -122197,8 +18965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10658/12318 [18:28:23<2:52:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10658/12318 [18:28:23<2:52:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 96123/154911 [02:05<00:43, 1349.73 examples/s]"
      ]
     },
     {
@@ -122206,8 +18973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10659/12318 [18:28:30<2:52:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10659/12318 [18:28:30<2:52:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 96266/154911 [02:05<00:43, 1363.03 examples/s]"
      ]
     },
     {
@@ -122215,8 +18981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10660/12318 [18:28:33<2:52:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10660/12318 [18:28:33<2:52:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 96406/154911 [02:05<00:42, 1367.95 examples/s]"
      ]
     },
     {
@@ -122224,7 +18989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10661/12318 [18:28:42<2:52:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 96553/154911 [02:05<00:41, 1392.38 examples/s]"
      ]
     },
     {
@@ -122232,7 +18997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10661/12318 [18:28:42<2:52:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  62%|████▉   | 96693/154911 [02:05<00:45, 1282.45 examples/s]"
      ]
     },
     {
@@ -122240,8 +19005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10662/12318 [18:28:48<2:52:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10662/12318 [18:28:48<2:52:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 96851/154911 [02:05<00:42, 1364.03 examples/s]"
      ]
     },
     {
@@ -122249,8 +19013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10663/12318 [18:28:57<2:52:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10663/12318 [18:28:57<2:52:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 96991/154911 [02:05<00:43, 1328.59 examples/s]"
      ]
     },
     {
@@ -122258,8 +19021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10664/12318 [18:29:03<2:52:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10664/12318 [18:29:03<2:52:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97189/154911 [02:05<00:38, 1510.92 examples/s]"
      ]
     },
     {
@@ -122267,8 +19029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10665/12318 [18:29:11<2:51:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10665/12318 [18:29:11<2:51:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97343/154911 [02:05<00:37, 1515.95 examples/s]"
      ]
     },
     {
@@ -122276,8 +19037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10666/12318 [18:29:20<2:51:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10666/12318 [18:29:20<2:51:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97498/154911 [02:06<00:38, 1477.57 examples/s]"
      ]
     },
     {
@@ -122285,8 +19045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10667/12318 [18:29:27<2:51:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10667/12318 [18:29:27<2:51:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97647/154911 [02:06<00:39, 1439.05 examples/s]"
      ]
     },
     {
@@ -122294,8 +19053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10668/12318 [18:29:31<2:51:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10668/12318 [18:29:31<2:51:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97792/154911 [02:06<00:41, 1365.07 examples/s]"
      ]
     },
     {
@@ -122303,8 +19061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10669/12318 [18:29:38<2:51:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10669/12318 [18:29:38<2:51:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 97936/154911 [02:06<00:41, 1385.61 examples/s]"
      ]
     },
     {
@@ -122312,8 +19069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10670/12318 [18:29:43<2:51:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10670/12318 [18:29:43<2:51:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 98076/154911 [02:06<00:41, 1366.94 examples/s]"
      ]
     },
     {
@@ -122321,8 +19077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10671/12318 [18:29:49<2:51:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10671/12318 [18:29:49<2:51:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 98215/154911 [02:06<00:41, 1351.15 examples/s]"
      ]
     },
     {
@@ -122330,8 +19085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10672/12318 [18:29:57<2:51:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10672/12318 [18:29:57<2:51:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  63%|█████   | 98352/154911 [02:06<00:41, 1349.06 examples/s]"
      ]
     },
     {
@@ -122339,8 +19093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10673/12318 [18:30:05<2:51:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10673/12318 [18:30:05<2:51:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████   | 98488/154911 [02:06<00:43, 1311.52 examples/s]"
      ]
     },
     {
@@ -122348,8 +19101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10674/12318 [18:30:12<2:50:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10674/12318 [18:30:12<2:50:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████   | 98620/154911 [02:06<00:43, 1303.85 examples/s]"
      ]
     },
     {
@@ -122357,8 +19109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10675/12318 [18:30:19<2:50:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10675/12318 [18:30:19<2:50:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████   | 98752/154911 [02:07<00:44, 1271.47 examples/s]"
      ]
     },
     {
@@ -122366,8 +19117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10676/12318 [18:30:23<2:50:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10676/12318 [18:30:23<2:50:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████   | 98896/154911 [02:07<00:42, 1312.79 examples/s]"
      ]
     },
     {
@@ -122375,7 +19125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10677/12318 [18:30:32<2:50:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████   | 99062/154911 [02:07<00:39, 1411.99 examples/s]"
      ]
     },
     {
@@ -122383,7 +19133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10677/12318 [18:30:32<2:50:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████▏  | 99276/154911 [02:07<00:34, 1624.06 examples/s]"
      ]
     },
     {
@@ -122391,8 +19141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10678/12318 [18:30:37<2:50:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10678/12318 [18:30:37<2:50:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████▏  | 99441/154911 [02:07<00:35, 1559.63 examples/s]"
      ]
     },
     {
@@ -122400,8 +19149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10679/12318 [18:30:39<2:50:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10679/12318 [18:30:39<2:50:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████▏  | 99600/154911 [02:07<00:35, 1557.79 examples/s]"
      ]
     },
     {
@@ -122409,8 +19157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10680/12318 [18:30:40<2:50:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10680/12318 [18:30:40<2:50:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████▏  | 99758/154911 [02:07<00:37, 1466.69 examples/s]"
      ]
     },
     {
@@ -122418,8 +19165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10681/12318 [18:30:46<2:50:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10681/12318 [18:30:46<2:50:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  64%|█████▏  | 99908/154911 [02:07<00:42, 1295.04 examples/s]"
      ]
     },
     {
@@ -122427,8 +19173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10682/12318 [18:30:54<2:50:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10682/12318 [18:30:54<2:50:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100065/154911 [02:07<00:40, 1362.58 examples/s]"
      ]
     },
     {
@@ -122436,8 +19181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10683/12318 [18:31:01<2:50:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10683/12318 [18:31:01<2:50:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100206/154911 [02:08<00:40, 1353.57 examples/s]"
      ]
     },
     {
@@ -122445,8 +19189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10684/12318 [18:31:07<2:49:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10684/12318 [18:31:07<2:49:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100346/154911 [02:08<00:40, 1343.47 examples/s]"
      ]
     },
     {
@@ -122454,8 +19197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10685/12318 [18:31:13<2:49:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10685/12318 [18:31:13<2:49:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100484/154911 [02:08<00:40, 1332.42 examples/s]"
      ]
     },
     {
@@ -122463,8 +19205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10686/12318 [18:31:17<2:49:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10686/12318 [18:31:17<2:49:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100626/154911 [02:08<00:40, 1339.71 examples/s]"
      ]
     },
     {
@@ -122472,8 +19213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10687/12318 [18:31:23<2:49:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10687/12318 [18:31:23<2:49:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100771/154911 [02:08<00:39, 1356.17 examples/s]"
      ]
     },
     {
@@ -122481,8 +19221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10688/12318 [18:31:39<2:49:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10688/12318 [18:31:39<2:49:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 100927/154911 [02:08<00:38, 1405.73 examples/s]"
      ]
     },
     {
@@ -122490,8 +19229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10689/12318 [18:31:46<2:49:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10689/12318 [18:31:46<2:49:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 101071/154911 [02:08<00:39, 1377.80 examples/s]"
      ]
     },
     {
@@ -122499,8 +19237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10690/12318 [18:31:53<2:49:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10690/12318 [18:31:53<2:49:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 101212/154911 [02:08<00:39, 1361.64 examples/s]"
      ]
     },
     {
@@ -122508,8 +19245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10691/12318 [18:32:00<2:49:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10691/12318 [18:32:00<2:49:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  65%|████▌  | 101363/154911 [02:08<00:38, 1401.00 examples/s]"
      ]
     },
     {
@@ -122517,8 +19253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10692/12318 [18:32:04<2:49:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10692/12318 [18:32:04<2:49:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 101505/154911 [02:08<00:38, 1397.59 examples/s]"
      ]
     },
     {
@@ -122526,8 +19261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10693/12318 [18:32:10<2:49:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10693/12318 [18:32:10<2:49:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 101663/154911 [02:09<00:37, 1433.53 examples/s]"
      ]
     },
     {
@@ -122535,8 +19269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10694/12318 [18:32:15<2:48:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10694/12318 [18:32:15<2:48:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 101809/154911 [02:09<00:37, 1425.22 examples/s]"
      ]
     },
     {
@@ -122544,8 +19277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10695/12318 [18:32:24<2:48:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10695/12318 [18:32:24<2:48:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 101965/154911 [02:09<00:36, 1464.37 examples/s]"
      ]
     },
     {
@@ -122553,8 +19285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10696/12318 [18:32:26<2:48:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10696/12318 [18:32:26<2:48:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 102145/154911 [02:09<00:33, 1557.70 examples/s]"
      ]
     },
     {
@@ -122562,8 +19293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10697/12318 [18:32:33<2:48:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10697/12318 [18:32:33<2:48:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▌  | 102306/154911 [02:09<00:34, 1542.62 examples/s]"
      ]
     },
     {
@@ -122571,8 +19301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10698/12318 [18:32:39<2:48:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10698/12318 [18:32:39<2:48:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▋  | 102463/154911 [02:09<00:34, 1537.34 examples/s]"
      ]
     },
     {
@@ -122580,8 +19309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10699/12318 [18:32:46<2:48:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10699/12318 [18:32:46<2:48:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▋  | 102617/154911 [02:09<00:35, 1475.74 examples/s]"
      ]
     },
     {
@@ -122589,8 +19317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10700/12318 [18:32:54<2:48:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10700/12318 [18:32:54<2:48:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▋  | 102766/154911 [02:09<00:36, 1410.52 examples/s]"
      ]
     },
     {
@@ -122598,8 +19325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10701/12318 [18:33:01<2:48:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10701/12318 [18:33:01<2:48:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  66%|████▋  | 102909/154911 [02:09<00:37, 1390.21 examples/s]"
      ]
     },
     {
@@ -122607,8 +19333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10702/12318 [18:33:07<2:48:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10702/12318 [18:33:07<2:48:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103050/154911 [02:10<00:37, 1371.63 examples/s]"
      ]
     },
     {
@@ -122616,8 +19341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10703/12318 [18:33:11<2:47:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10703/12318 [18:33:11<2:47:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103188/154911 [02:10<00:38, 1351.41 examples/s]"
      ]
     },
     {
@@ -122625,8 +19349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10704/12318 [18:33:13<2:47:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10704/12318 [18:33:13<2:47:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103324/154911 [02:10<00:38, 1351.34 examples/s]"
      ]
     },
     {
@@ -122634,8 +19357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10705/12318 [18:33:17<2:47:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10705/12318 [18:33:17<2:47:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103460/154911 [02:10<00:38, 1342.94 examples/s]"
      ]
     },
     {
@@ -122643,8 +19365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10706/12318 [18:33:21<2:47:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10706/12318 [18:33:21<2:47:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103599/154911 [02:10<00:38, 1338.61 examples/s]"
      ]
     },
     {
@@ -122652,8 +19373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10707/12318 [18:33:30<2:47:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10707/12318 [18:33:30<2:47:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103746/154911 [02:10<00:37, 1373.35 examples/s]"
      ]
     },
     {
@@ -122661,8 +19381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10708/12318 [18:33:37<2:47:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10708/12318 [18:33:37<2:47:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 103886/154911 [02:10<00:37, 1378.98 examples/s]"
      ]
     },
     {
@@ -122670,8 +19389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10709/12318 [18:33:41<2:47:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10709/12318 [18:33:41<2:47:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 104030/154911 [02:10<00:36, 1395.66 examples/s]"
      ]
     },
     {
@@ -122679,8 +19397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10710/12318 [18:33:44<2:47:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10710/12318 [18:33:44<2:47:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 104170/154911 [02:10<00:36, 1388.16 examples/s]"
      ]
     },
     {
@@ -122688,8 +19405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10711/12318 [18:33:52<2:47:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|���| 10711/12318 [18:33:52<2:47:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 104320/154911 [02:10<00:35, 1416.94 examples/s]"
      ]
     },
     {
@@ -122697,8 +19413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10712/12318 [18:34:00<2:47:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10712/12318 [18:34:00<2:47:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  67%|████▋  | 104473/154911 [02:11<00:34, 1447.98 examples/s]"
      ]
     },
     {
@@ -122706,8 +19421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10713/12318 [18:34:07<2:46:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10713/12318 [18:34:07<2:46:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▋  | 104620/154911 [02:11<00:34, 1448.48 examples/s]"
      ]
     },
     {
@@ -122715,8 +19429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10714/12318 [18:34:11<2:46:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10714/12318 [18:34:11<2:46:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▋  | 104767/154911 [02:11<00:34, 1454.55 examples/s]"
      ]
     },
     {
@@ -122724,8 +19437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10715/12318 [18:34:17<2:46:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10715/12318 [18:34:17<2:46:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▋  | 104921/154911 [02:11<00:33, 1477.31 examples/s]"
      ]
     },
     {
@@ -122733,8 +19445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10716/12318 [18:34:26<2:46:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10716/12318 [18:34:26<2:46:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▋  | 105092/154911 [02:11<00:32, 1546.05 examples/s]"
      ]
     },
     {
@@ -122742,8 +19453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10717/12318 [18:34:28<2:46:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10717/12318 [18:34:28<2:46:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105247/154911 [02:11<00:33, 1482.93 examples/s]"
      ]
     },
     {
@@ -122751,8 +19461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10718/12318 [18:34:32<2:46:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10718/12318 [18:34:32<2:46:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105397/154911 [02:11<00:34, 1446.17 examples/s]"
      ]
     },
     {
@@ -122760,8 +19469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10719/12318 [18:34:40<2:46:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10719/12318 [18:34:40<2:46:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105544/154911 [02:11<00:34, 1414.36 examples/s]"
      ]
     },
     {
@@ -122769,8 +19477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10720/12318 [18:34:55<2:46:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10720/12318 [18:34:55<2:46:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105690/154911 [02:11<00:34, 1421.97 examples/s]"
      ]
     },
     {
@@ -122778,8 +19485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10721/12318 [18:35:03<2:46:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10721/12318 [18:35:03<2:46:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105834/154911 [02:12<00:34, 1409.08 examples/s]"
      ]
     },
     {
@@ -122787,8 +19493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10722/12318 [18:35:07<2:45:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10722/12318 [18:35:07<2:45:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  68%|████▊  | 105976/154911 [02:12<00:35, 1367.81 examples/s]"
      ]
     },
     {
@@ -122796,8 +19501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10723/12318 [18:35:13<2:45:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10723/12318 [18:35:13<2:45:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106136/154911 [02:12<00:34, 1416.65 examples/s]"
      ]
     },
     {
@@ -122805,8 +19509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10724/12318 [18:35:22<2:45:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10724/12318 [18:35:22<2:45:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106311/154911 [02:12<00:32, 1509.62 examples/s]"
      ]
     },
     {
@@ -122814,8 +19517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10725/12318 [18:35:28<2:45:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10725/12318 [18:35:28<2:45:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106470/154911 [02:12<00:31, 1527.54 examples/s]"
      ]
     },
     {
@@ -122823,8 +19525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10726/12318 [18:35:29<2:45:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10726/12318 [18:35:29<2:45:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106626/154911 [02:12<00:33, 1428.87 examples/s]"
      ]
     },
     {
@@ -122832,8 +19533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10727/12318 [18:35:31<2:45:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10727/12318 [18:35:31<2:45:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106778/154911 [02:12<00:33, 1450.90 examples/s]"
      ]
     },
     {
@@ -122841,8 +19541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10728/12318 [18:35:36<2:45:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10728/12318 [18:35:36<2:45:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 106937/154911 [02:12<00:32, 1470.58 examples/s]"
      ]
     },
     {
@@ -122850,8 +19549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10729/12318 [18:35:41<2:45:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10729/12318 [18:35:41<2:45:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 107105/154911 [02:12<00:31, 1509.13 examples/s]"
      ]
     },
     {
@@ -122859,8 +19557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10730/12318 [18:35:46<2:45:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10730/12318 [18:35:46<2:45:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 107258/154911 [02:12<00:33, 1438.13 examples/s]"
      ]
     },
     {
@@ -122868,8 +19565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10731/12318 [18:35:55<2:45:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10731/12318 [18:35:55<2:45:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 107403/154911 [02:13<00:33, 1406.62 examples/s]"
      ]
     },
     {
@@ -122877,8 +19573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10732/12318 [18:35:59<2:44:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10732/12318 [18:35:59<2:44:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  69%|████▊  | 107546/154911 [02:13<00:35, 1331.08 examples/s]"
      ]
     },
     {
@@ -122886,8 +19581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10733/12318 [18:36:06<2:44:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10733/12318 [18:36:06<2:44:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▊  | 107682/154911 [02:13<00:35, 1326.97 examples/s]"
      ]
     },
     {
@@ -122895,8 +19589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10734/12318 [18:36:14<2:44:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10734/12318 [18:36:14<2:44:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▊  | 107817/154911 [02:13<00:35, 1330.75 examples/s]"
      ]
     },
     {
@@ -122904,8 +19597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10735/12318 [18:36:17<2:44:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10735/12318 [18:36:17<2:44:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 107966/154911 [02:13<00:34, 1366.01 examples/s]"
      ]
     },
     {
@@ -122913,8 +19605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10736/12318 [18:36:22<2:44:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10736/12318 [18:36:22<2:44:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108104/154911 [02:13<00:35, 1330.53 examples/s]"
      ]
     },
     {
@@ -122922,8 +19613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10737/12318 [18:36:24<2:44:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10737/12318 [18:36:24<2:44:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108239/154911 [02:13<00:35, 1306.90 examples/s]"
      ]
     },
     {
@@ -122931,8 +19621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10738/12318 [18:36:30<2:44:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10738/12318 [18:36:30<2:44:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108370/154911 [02:13<00:35, 1293.04 examples/s]"
      ]
     },
     {
@@ -122940,8 +19629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10739/12318 [18:36:32<2:44:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10739/12318 [18:36:32<2:44:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108500/154911 [02:13<00:37, 1252.98 examples/s]"
      ]
     },
     {
@@ -122949,7 +19637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10740/12318 [18:36:37<2:44:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108631/154911 [02:14<00:36, 1261.63 examples/s]"
      ]
     },
     {
@@ -122957,7 +19645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10740/12318 [18:36:37<2:44:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108759/154911 [02:14<00:37, 1214.96 examples/s]"
      ]
     },
     {
@@ -122965,8 +19653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10741/12318 [18:36:45<2:43:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10741/12318 [18:36:45<2:43:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 108881/154911 [02:14<00:38, 1191.52 examples/s]"
      ]
     },
     {
@@ -122974,8 +19661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10742/12318 [18:36:51<2:43:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10742/12318 [18:36:51<2:43:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 109015/154911 [02:14<00:38, 1203.89 examples/s]"
      ]
     },
     {
@@ -122983,8 +19669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10743/12318 [18:36:54<2:43:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10743/12318 [18:36:54<2:43:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  70%|████▉  | 109136/154911 [02:14<00:39, 1148.17 examples/s]"
      ]
     },
     {
@@ -122992,8 +19677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10744/12318 [18:37:01<2:43:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10744/12318 [18:37:01<2:43:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109254/154911 [02:14<00:39, 1156.98 examples/s]"
      ]
     },
     {
@@ -123001,8 +19685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10745/12318 [18:37:04<2:43:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10745/12318 [18:37:04<2:43:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109376/154911 [02:14<00:39, 1164.25 examples/s]"
      ]
     },
     {
@@ -123010,8 +19693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10746/12318 [18:37:06<2:43:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10746/12318 [18:37:06<2:43:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109493/154911 [02:14<00:39, 1137.57 examples/s]"
      ]
     },
     {
@@ -123019,8 +19701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10747/12318 [18:37:11<2:43:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10747/12318 [18:37:11<2:43:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109609/154911 [02:14<00:39, 1143.18 examples/s]"
      ]
     },
     {
@@ -123028,8 +19709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10748/12318 [18:37:19<2:43:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10748/12318 [18:37:19<2:43:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109724/154911 [02:15<00:41, 1099.83 examples/s]"
      ]
     },
     {
@@ -123037,8 +19717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10749/12318 [18:37:24<2:43:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10749/12318 [18:37:24<2:43:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109841/154911 [02:15<00:40, 1115.35 examples/s]"
      ]
     },
     {
@@ -123046,8 +19725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10750/12318 [18:37:33<2:43:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10750/12318 [18:37:33<2:43:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 109960/154911 [02:15<00:39, 1134.53 examples/s]"
      ]
     },
     {
@@ -123055,8 +19733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10751/12318 [18:37:35<2:42:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10751/12318 [18:37:35<2:42:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 110085/154911 [02:15<00:38, 1158.82 examples/s]"
      ]
     },
     {
@@ -123064,8 +19741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10752/12318 [18:38:01<2:42:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10752/12318 [18:38:01<2:42:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 110207/154911 [02:15<00:38, 1175.79 examples/s]"
      ]
     },
     {
@@ -123073,8 +19749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10753/12318 [18:38:10<2:42:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10753/12318 [18:38:10<2:42:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 110326/154911 [02:15<00:38, 1157.99 examples/s]"
      ]
     },
     {
@@ -123082,8 +19757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10754/12318 [18:38:16<2:42:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10754/12318 [18:38:16<2:42:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 110446/154911 [02:15<00:38, 1166.09 examples/s]"
      ]
     },
     {
@@ -123091,8 +19765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10755/12318 [18:38:23<2:42:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10755/12318 [18:38:23<2:42:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|████▉  | 110564/154911 [02:15<00:38, 1147.46 examples/s]"
      ]
     },
     {
@@ -123100,8 +19773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10756/12318 [18:38:26<2:42:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10756/12318 [18:38:26<2:42:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  71%|█████  | 110679/154911 [02:15<00:38, 1139.27 examples/s]"
      ]
     },
     {
@@ -123109,8 +19781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10757/12318 [18:38:29<2:42:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10757/12318 [18:38:29<2:42:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 110800/154911 [02:15<00:38, 1156.10 examples/s]"
      ]
     },
     {
@@ -123118,8 +19789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10758/12318 [18:38:35<2:42:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10758/12318 [18:38:35<2:42:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 110919/154911 [02:16<00:37, 1160.03 examples/s]"
      ]
     },
     {
@@ -123127,8 +19797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10759/12318 [18:38:38<2:42:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10759/12318 [18:38:38<2:42:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111053/154911 [02:16<00:36, 1206.37 examples/s]"
      ]
     },
     {
@@ -123136,8 +19805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10760/12318 [18:38:43<2:41:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10760/12318 [18:38:43<2:41:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111234/154911 [02:16<00:31, 1379.88 examples/s]"
      ]
     },
     {
@@ -123145,8 +19813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10761/12318 [18:38:50<2:41:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10761/12318 [18:38:50<2:41:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111386/154911 [02:16<00:30, 1421.06 examples/s]"
      ]
     },
     {
@@ -123154,8 +19821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10762/12318 [18:38:51<2:41:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10762/12318 [18:38:51<2:41:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111530/154911 [02:16<00:33, 1291.76 examples/s]"
      ]
     },
     {
@@ -123163,8 +19829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10763/12318 [18:38:55<2:41:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10763/12318 [18:38:55<2:41:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111663/154911 [02:16<00:36, 1194.57 examples/s]"
      ]
     },
     {
@@ -123172,8 +19837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10764/12318 [18:39:03<2:41:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10764/12318 [18:39:03<2:41:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111786/154911 [02:16<00:36, 1168.04 examples/s]"
      ]
     },
     {
@@ -123181,8 +19845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10765/12318 [18:39:08<2:41:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10765/12318 [18:39:08<2:41:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 111906/154911 [02:16<00:38, 1111.73 examples/s]"
      ]
     },
     {
@@ -123190,8 +19853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10766/12318 [18:39:15<2:41:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10766/12318 [18:39:15<2:41:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 112020/154911 [02:16<00:38, 1114.51 examples/s]"
      ]
     },
     {
@@ -123199,8 +19861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10767/12318 [18:39:18<2:41:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10767/12318 [18:39:18<2:41:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 112134/154911 [02:17<00:39, 1089.07 examples/s]"
      ]
     },
     {
@@ -123208,8 +19869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10768/12318 [18:39:21<2:41:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10768/12318 [18:39:21<2:41:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  72%|█████  | 112244/154911 [02:17<00:39, 1089.63 examples/s]"
      ]
     },
     {
@@ -123217,8 +19877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10769/12318 [18:39:30<2:41:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10769/12318 [18:39:30<2:41:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112369/154911 [02:17<00:37, 1134.04 examples/s]"
      ]
     },
     {
@@ -123226,8 +19885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10770/12318 [18:39:35<2:40:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10770/12318 [18:39:35<2:40:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112484/154911 [02:17<00:37, 1138.10 examples/s]"
      ]
     },
     {
@@ -123235,8 +19893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10771/12318 [18:39:37<2:40:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10771/12318 [18:39:37<2:40:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112600/154911 [02:17<00:38, 1089.34 examples/s]"
      ]
     },
     {
@@ -123244,8 +19901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10772/12318 [18:39:41<2:40:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10772/12318 [18:39:41<2:40:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112711/154911 [02:17<00:39, 1061.23 examples/s]"
      ]
     },
     {
@@ -123253,8 +19909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10773/12318 [18:39:46<2:40:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10773/12318 [18:39:46<2:40:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112823/154911 [02:17<00:39, 1070.51 examples/s]"
      ]
     },
     {
@@ -123262,8 +19917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10774/12318 [18:39:48<2:40:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10774/12318 [18:39:48<2:40:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 112948/154911 [02:17<00:37, 1120.80 examples/s]"
      ]
     },
     {
@@ -123271,8 +19925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10775/12318 [18:39:53<2:40:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10775/12318 [18:39:53<2:40:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|████��  | 113062/154911 [02:17<00:39, 1062.91 examples/s]"
      ]
     },
     {
@@ -123280,8 +19933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10776/12318 [18:39:58<2:40:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10776/12318 [18:39:58<2:40:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 113179/154911 [02:18<00:38, 1083.61 examples/s]"
      ]
     },
     {
@@ -123289,8 +19941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10777/12318 [18:40:03<2:40:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10777/12318 [18:40:03<2:40:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 113291/154911 [02:18<00:39, 1059.68 examples/s]"
      ]
     },
     {
@@ -123298,8 +19949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  87%|▊| 10778/12318 [18:40:10<2:40:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  87%|▊| 10778/12318 [18:40:10<2:40:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████  | 113403/154911 [02:18<00:39, 1062.30 examples/s]"
      ]
     },
     {
@@ -123307,8 +19957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10779/12318 [18:40:14<2:39:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10779/12318 [18:40:14<2:39:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████▏ | 113510/154911 [02:18<00:39, 1052.95 examples/s]"
      ]
     },
     {
@@ -123316,8 +19965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10780/12318 [18:40:21<2:39:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10780/12318 [18:40:21<2:39:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████▏ | 113622/154911 [02:18<00:38, 1070.29 examples/s]"
      ]
     },
     {
@@ -123325,8 +19973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10781/12318 [18:40:26<2:39:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10781/12318 [18:40:26<2:39:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████▏ | 113730/154911 [02:18<00:38, 1070.62 examples/s]"
      ]
     },
     {
@@ -123334,8 +19981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10782/12318 [18:40:35<2:39:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10782/12318 [18:40:35<2:39:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  73%|█████▏ | 113843/154911 [02:18<00:38, 1068.03 examples/s]"
      ]
     },
     {
@@ -123343,8 +19989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10783/12318 [18:40:38<2:39:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10783/12318 [18:40:38<2:39:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 113962/154911 [02:18<00:37, 1093.08 examples/s]"
      ]
     },
     {
@@ -123352,8 +19997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10784/12318 [18:41:25<2:39:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10784/12318 [18:41:25<2:39:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114089/154911 [02:18<00:35, 1142.99 examples/s]"
      ]
     },
     {
@@ -123361,8 +20005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10785/12318 [18:41:34<2:39:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10785/12318 [18:41:34<2:39:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114227/154911 [02:18<00:33, 1211.77 examples/s]"
      ]
     },
     {
@@ -123370,8 +20013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10786/12318 [18:41:35<2:39:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10786/12318 [18:41:35<2:39:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114398/154911 [02:19<00:29, 1356.88 examples/s]"
      ]
     },
     {
@@ -123379,8 +20021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10787/12318 [18:41:40<2:39:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10787/12318 [18:41:40<2:39:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114603/154911 [02:19<00:26, 1548.41 examples/s]"
      ]
     },
     {
@@ -123388,8 +20029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10788/12318 [18:41:45<2:39:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10788/12318 [18:41:45<2:39:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114760/154911 [02:19<00:27, 1465.26 examples/s]"
      ]
     },
     {
@@ -123397,8 +20037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10789/12318 [18:41:50<2:38:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10789/12318 [18:41:50<2:38:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 114908/154911 [02:19<00:28, 1403.09 examples/s]"
      ]
     },
     {
@@ -123406,8 +20045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10790/12318 [18:41:54<2:38:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10790/12318 [18:41:54<2:38:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 115050/154911 [02:19<00:30, 1319.16 examples/s]"
      ]
     },
     {
@@ -123415,8 +20053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10791/12318 [18:41:57<2:38:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10791/12318 [18:41:57<2:38:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 115185/154911 [02:19<00:30, 1295.67 examples/s]"
      ]
     },
     {
@@ -123424,8 +20061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10792/12318 [18:42:00<2:38:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10792/12318 [18:42:00<2:38:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  74%|█████▏ | 115317/154911 [02:19<00:31, 1256.90 examples/s]"
      ]
     },
     {
@@ -123433,8 +20069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10793/12318 [18:42:09<2:38:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10793/12318 [18:42:09<2:38:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 115444/154911 [02:19<00:32, 1219.80 examples/s]"
      ]
     },
     {
@@ -123442,8 +20077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10794/12318 [18:42:12<2:38:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10794/12318 [18:42:12<2:38:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 115567/154911 [02:19<00:33, 1178.59 examples/s]"
      ]
     },
     {
@@ -123451,8 +20085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10795/12318 [18:42:18<2:38:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10795/12318 [18:42:18<2:38:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 115692/154911 [02:20<00:33, 1182.21 examples/s]"
      ]
     },
     {
@@ -123460,8 +20093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10796/12318 [18:42:24<2:38:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10796/12318 [18:42:24<2:38:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 115811/154911 [02:20<00:33, 1156.96 examples/s]"
      ]
     },
     {
@@ -123469,8 +20101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10797/12318 [18:42:30<2:38:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10797/12318 [18:42:30<2:38:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 115932/154911 [02:20<00:33, 1169.84 examples/s]"
      ]
     },
     {
@@ -123478,8 +20109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10798/12318 [18:42:34<2:38:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10798/12318 [18:42:34<2:38:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 116050/154911 [02:20<00:33, 1160.71 examples/s]"
      ]
     },
     {
@@ -123487,8 +20117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10799/12318 [18:42:40<2:37:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10799/12318 [18:42:40<2:37:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▏ | 116167/154911 [02:20<00:33, 1160.64 examples/s]"
      ]
     },
     {
@@ -123496,8 +20125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10800/12318 [18:42:45<2:37:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10800/12318 [18:42:45<2:37:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116287/154911 [02:20<00:33, 1169.63 examples/s]"
      ]
     },
     {
@@ -123505,8 +20133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10801/12318 [18:42:50<2:37:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10801/12318 [18:42:50<2:37:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116407/154911 [02:20<00:33, 1153.51 examples/s]"
      ]
     },
     {
@@ -123514,8 +20141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10802/12318 [18:42:54<2:37:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10802/12318 [18:42:54<2:37:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116524/154911 [02:20<00:33, 1152.89 examples/s]"
      ]
     },
     {
@@ -123523,8 +20149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10803/12318 [18:43:00<2:37:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10803/12318 [18:43:00<2:37:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116646/154911 [02:20<00:32, 1172.48 examples/s]"
      ]
     },
     {
@@ -123532,8 +20157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10804/12318 [18:43:05<2:37:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10804/12318 [18:43:05<2:37:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116764/154911 [02:21<00:32, 1163.29 examples/s]"
      ]
     },
     {
@@ -123541,7 +20165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10805/12318 [18:43:10<2:37:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  75%|█████▎ | 116895/154911 [02:21<00:31, 1194.30 examples/s]"
      ]
     },
     {
@@ -123549,7 +20173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10805/12318 [18:43:10<2:37:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117015/154911 [02:21<00:33, 1136.92 examples/s]"
      ]
     },
     {
@@ -123557,8 +20181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10806/12318 [18:43:12<2:37:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10806/12318 [18:43:12<2:37:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117154/154911 [02:21<00:31, 1205.82 examples/s]"
      ]
     },
     {
@@ -123566,8 +20189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10807/12318 [18:43:16<2:37:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10807/12318 [18:43:16<2:37:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117276/154911 [02:21<00:32, 1158.84 examples/s]"
      ]
     },
     {
@@ -123575,7 +20197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10808/12318 [18:43:21<2:36:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117408/154911 [02:21<00:31, 1204.20 examples/s]"
      ]
     },
     {
@@ -123583,7 +20205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10808/12318 [18:43:21<2:36:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117532/154911 [02:21<00:30, 1212.31 examples/s]"
      ]
     },
     {
@@ -123591,8 +20213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10809/12318 [18:43:27<2:36:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10809/12318 [18:43:27<2:36:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117659/154911 [02:21<00:30, 1228.92 examples/s]"
      ]
     },
     {
@@ -123600,8 +20221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10810/12318 [18:43:34<2:36:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10810/12318 [18:43:34<2:36:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117791/154911 [02:21<00:29, 1255.16 examples/s]"
      ]
     },
     {
@@ -123609,8 +20229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10811/12318 [18:43:38<2:36:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10811/12318 [18:43:38<2:36:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 117918/154911 [02:21<00:29, 1245.76 examples/s]"
      ]
     },
     {
@@ -123618,8 +20237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10812/12318 [18:43:39<2:36:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10812/12318 [18:43:39<2:36:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 118044/154911 [02:22<00:29, 1241.61 examples/s]"
      ]
     },
     {
@@ -123627,8 +20245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10813/12318 [18:43:48<2:36:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10813/12318 [18:43:48<2:36:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 118174/154911 [02:22<00:29, 1256.48 examples/s]"
      ]
     },
     {
@@ -123636,8 +20253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10814/12318 [18:43:52<2:36:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10814/12318 [18:43:52<2:36:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 118301/154911 [02:22<00:29, 1258.85 examples/s]"
      ]
     },
     {
@@ -123645,8 +20261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10815/12318 [18:43:57<2:36:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10815/12318 [18:43:57<2:36:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  76%|█████▎ | 118428/154911 [02:22<00:29, 1252.66 examples/s]"
      ]
     },
     {
@@ -123654,8 +20269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10816/12318 [18:44:49<2:36:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10816/12318 [18:44:49<2:36:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▎ | 118554/154911 [02:22<00:29, 1232.36 examples/s]"
      ]
     },
     {
@@ -123663,8 +20277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10817/12318 [18:44:57<2:36:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10817/12318 [18:44:57<2:36:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▎ | 118688/154911 [02:22<00:28, 1261.31 examples/s]"
      ]
     },
     {
@@ -123672,8 +20285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10818/12318 [18:45:06<2:36:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10818/12318 [18:45:06<2:36:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▎ | 118815/154911 [02:22<00:29, 1225.65 examples/s]"
      ]
     },
     {
@@ -123681,8 +20293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10819/12318 [18:45:09<2:35:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10819/12318 [18:45:09<2:35:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▎ | 118939/154911 [02:22<00:30, 1196.92 examples/s]"
      ]
     },
     {
@@ -123690,8 +20301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10820/12318 [18:45:18<2:35:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10820/12318 [18:45:18<2:35:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119061/154911 [02:22<00:29, 1201.64 examples/s]"
      ]
     },
     {
@@ -123699,8 +20309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10821/12318 [18:45:25<2:35:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10821/12318 [18:45:25<2:35:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119183/154911 [02:22<00:29, 1193.77 examples/s]"
      ]
     },
     {
@@ -123708,8 +20317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10822/12318 [18:45:29<2:35:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10822/12318 [18:45:29<2:35:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119303/154911 [02:23<00:30, 1179.09 examples/s]"
      ]
     },
     {
@@ -123717,8 +20325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10823/12318 [18:45:30<2:35:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10823/12318 [18:45:30<2:35:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119426/154911 [02:23<00:29, 1186.08 examples/s]"
      ]
     },
     {
@@ -123726,8 +20333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10824/12318 [18:45:36<2:35:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10824/12318 [18:45:36<2:35:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119549/154911 [02:23<00:29, 1194.99 examples/s]"
      ]
     },
     {
@@ -123735,8 +20341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10825/12318 [18:45:39<2:35:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10825/12318 [18:45:39<2:35:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119687/154911 [02:23<00:28, 1237.66 examples/s]"
      ]
     },
     {
@@ -123744,8 +20349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10826/12318 [18:45:46<2:35:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10826/12318 [18:45:46<2:35:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119825/154911 [02:23<00:27, 1278.02 examples/s]"
      ]
     },
     {
@@ -123753,8 +20357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10827/12318 [18:45:47<2:35:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10827/12318 [18:45:47<2:35:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  77%|█████▍ | 119984/154911 [02:23<00:25, 1369.34 examples/s]"
      ]
     },
     {
@@ -123762,8 +20365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10828/12318 [18:45:56<2:34:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10828/12318 [18:45:56<2:34:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120181/154911 [02:23<00:22, 1545.64 examples/s]"
      ]
     },
     {
@@ -123771,8 +20373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10829/12318 [18:46:04<2:34:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10829/12318 [18:46:04<2:34:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120336/154911 [02:23<00:25, 1368.80 examples/s]"
      ]
     },
     {
@@ -123780,8 +20381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10830/12318 [18:46:10<2:34:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10830/12318 [18:46:10<2:34:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120477/154911 [02:23<00:26, 1305.62 examples/s]"
      ]
     },
     {
@@ -123789,8 +20389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10831/12318 [18:46:19<2:34:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10831/12318 [18:46:19<2:34:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120612/154911 [02:24<00:27, 1243.14 examples/s]"
      ]
     },
     {
@@ -123798,8 +20397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10832/12318 [18:46:26<2:34:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10832/12318 [18:46:26<2:34:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120747/154911 [02:24<00:27, 1264.50 examples/s]"
      ]
     },
     {
@@ -123807,8 +20405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10833/12318 [18:46:32<2:34:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10833/12318 [18:46:32<2:34:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120876/154911 [02:24<00:28, 1196.11 examples/s]"
      ]
     },
     {
@@ -123816,8 +20413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10834/12318 [18:46:36<2:34:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10834/12318 [18:46:36<2:34:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 120998/154911 [02:24<00:29, 1158.80 examples/s]"
      ]
     },
     {
@@ -123825,8 +20421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10835/12318 [18:46:44<2:34:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10835/12318 [18:46:44<2:34:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 121124/154911 [02:24<00:29, 1155.41 examples/s]"
      ]
     },
     {
@@ -123834,7 +20429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10836/12318 [18:46:47<2:34:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 121248/154911 [02:24<00:28, 1177.18 examples/s]"
      ]
     },
     {
@@ -123842,7 +20437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10836/12318 [18:46:47<2:34:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 121369/154911 [02:24<00:28, 1164.43 examples/s]"
      ]
     },
     {
@@ -123850,8 +20445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10837/12318 [18:46:52<2:34:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10837/12318 [18:46:52<2:34:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 121487/154911 [02:24<00:29, 1129.35 examples/s]"
      ]
     },
     {
@@ -123859,8 +20453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10838/12318 [18:46:59<2:33:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10838/12318 [18:46:59<2:33:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  78%|█████▍ | 121605/154911 [02:24<00:29, 1136.18 examples/s]"
      ]
     },
     {
@@ -123868,8 +20461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10839/12318 [18:47:05<2:33:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10839/12318 [18:47:05<2:33:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 121720/154911 [02:25<00:29, 1109.33 examples/s]"
      ]
     },
     {
@@ -123877,8 +20469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10840/12318 [18:47:12<2:33:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10840/12318 [18:47:12<2:33:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 121832/154911 [02:25<00:32, 1022.92 examples/s]"
      ]
     },
     {
@@ -123886,8 +20477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10841/12318 [18:47:16<2:33:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10841/12318 [18:47:16<2:33:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 121965/154911 [02:25<00:29, 1099.47 examples/s]"
      ]
     },
     {
@@ -123895,8 +20485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10842/12318 [18:47:23<2:33:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10842/12318 [18:47:23<2:33:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122084/154911 [02:25<00:29, 1108.64 examples/s]"
      ]
     },
     {
@@ -123904,8 +20493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10843/12318 [18:47:32<2:33:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10843/12318 [18:47:32<2:33:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122197/154911 [02:25<00:29, 1101.72 examples/s]"
      ]
     },
     {
@@ -123913,8 +20501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10844/12318 [18:47:35<2:33:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10844/12318 [18:47:35<2:33:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122309/154911 [02:25<00:29, 1104.41 examples/s]"
      ]
     },
     {
@@ -123922,8 +20509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10845/12318 [18:47:40<2:33:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10845/12318 [18:47:40<2:33:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122421/154911 [02:25<00:29, 1106.98 examples/s]"
      ]
     },
     {
@@ -123931,8 +20517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10846/12318 [18:47:41<2:33:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10846/12318 [18:47:41<2:33:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122534/154911 [02:25<00:29, 1095.59 examples/s]"
      ]
     },
     {
@@ -123940,8 +20525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10847/12318 [18:47:47<2:32:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10847/12318 [18:47:47<2:32:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122644/154911 [02:25<00:29, 1080.71 examples/s]"
      ]
     },
     {
@@ -123949,8 +20533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10848/12318 [18:48:07<2:32:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10848/12318 [18:48:07<2:32:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122760/154911 [02:26<00:29, 1098.63 examples/s]"
      ]
     },
     {
@@ -123958,8 +20541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10849/12318 [18:48:11<2:32:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10849/12318 [18:48:11<2:32:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122871/154911 [02:26<00:29, 1095.51 examples/s]"
      ]
     },
     {
@@ -123967,8 +20549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10850/12318 [18:48:17<2:32:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10850/12318 [18:48:17<2:32:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  79%|█████▌ | 122993/154911 [02:26<00:28, 1131.00 examples/s]"
      ]
     },
     {
@@ -123976,8 +20557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10851/12318 [18:48:25<2:32:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10851/12318 [18:48:25<2:32:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 123181/154911 [02:26<00:23, 1349.11 examples/s]"
      ]
     },
     {
@@ -123985,8 +20565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10852/12318 [18:48:33<2:32:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10852/12318 [18:48:33<2:32:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 123399/154911 [02:26<00:19, 1592.72 examples/s]"
      ]
     },
     {
@@ -123994,8 +20573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10853/12318 [18:48:36<2:32:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10853/12318 [18:48:36<2:32:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 123559/154911 [02:26<00:20, 1505.08 examples/s]"
      ]
     },
     {
@@ -124003,8 +20581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10854/12318 [18:48:37<2:32:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10854/12318 [18:48:37<2:32:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 123724/154911 [02:26<00:20, 1542.30 examples/s]"
      ]
     },
     {
@@ -124012,8 +20589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10855/12318 [18:48:41<2:32:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10855/12318 [18:48:41<2:32:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 123881/154911 [02:26<00:20, 1536.39 examples/s]"
      ]
     },
     {
@@ -124021,8 +20597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10856/12318 [18:48:49<2:32:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10856/12318 [18:48:49<2:32:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 124037/154911 [02:26<00:21, 1455.22 examples/s]"
      ]
     },
     {
@@ -124030,8 +20605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10857/12318 [18:48:54<2:31:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10857/12318 [18:48:54<2:31:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 124184/154911 [02:26<00:22, 1376.05 examples/s]"
      ]
     },
     {
@@ -124039,8 +20613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10858/12318 [18:48:57<2:31:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10858/12318 [18:48:57<2:31:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 124324/154911 [02:27<00:22, 1336.38 examples/s]"
      ]
     },
     {
@@ -124048,8 +20621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10859/12318 [18:49:01<2:31:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10859/12318 [18:49:01<2:31:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▌ | 124460/154911 [02:27<00:22, 1325.33 examples/s]"
      ]
     },
     {
@@ -124057,8 +20629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10860/12318 [18:49:06<2:31:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10860/12318 [18:49:06<2:31:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  80%|█████▋ | 124595/154911 [02:27<00:23, 1289.30 examples/s]"
      ]
     },
     {
@@ -124066,8 +20637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10861/12318 [18:49:11<2:31:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10861/12318 [18:49:11<2:31:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 124725/154911 [02:27<00:23, 1265.88 examples/s]"
      ]
     },
     {
@@ -124075,8 +20645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10862/12318 [18:49:17<2:31:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10862/12318 [18:49:17<2:31:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 124852/154911 [02:27<00:24, 1247.50 examples/s]"
      ]
     },
     {
@@ -124084,8 +20653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10863/12318 [18:49:23<2:31:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10863/12318 [18:49:23<2:31:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 124980/154911 [02:27<00:24, 1246.00 examples/s]"
      ]
     },
     {
@@ -124093,8 +20661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10864/12318 [18:49:27<2:31:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10864/12318 [18:49:27<2:31:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125105/154911 [02:27<00:24, 1238.17 examples/s]"
      ]
     },
     {
@@ -124102,8 +20669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10865/12318 [18:49:30<2:31:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10865/12318 [18:49:30<2:31:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125230/154911 [02:27<00:24, 1211.69 examples/s]"
      ]
     },
     {
@@ -124111,8 +20677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10866/12318 [18:49:34<2:30:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10866/12318 [18:49:34<2:30:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125353/154911 [02:27<00:25, 1169.93 examples/s]"
      ]
     },
     {
@@ -124120,8 +20685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10867/12318 [18:49:38<2:30:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10867/12318 [18:49:38<2:30:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125472/154911 [02:28<00:25, 1173.81 examples/s]"
      ]
     },
     {
@@ -124129,8 +20693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10868/12318 [18:49:44<2:30:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10868/12318 [18:49:44<2:30:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125624/154911 [02:28<00:23, 1269.86 examples/s]"
      ]
     },
     {
@@ -124138,8 +20701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10869/12318 [18:49:47<2:30:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10869/12318 [18:49:47<2:30:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125771/154911 [02:28<00:21, 1325.72 examples/s]"
      ]
     },
     {
@@ -124147,8 +20709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10870/12318 [18:49:53<2:30:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10870/12318 [18:49:53<2:30:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 125906/154911 [02:28<00:23, 1254.33 examples/s]"
      ]
     },
     {
@@ -124156,8 +20717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10871/12318 [18:50:01<2:30:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10871/12318 [18:50:01<2:30:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 126033/154911 [02:28<00:23, 1213.99 examples/s]"
      ]
     },
     {
@@ -124165,8 +20725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10872/12318 [18:50:08<2:30:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10872/12318 [18:50:08<2:30:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  81%|█████▋ | 126156/154911 [02:28<00:23, 1200.49 examples/s]"
      ]
     },
     {
@@ -124174,8 +20733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10873/12318 [18:50:17<2:30:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10873/12318 [18:50:17<2:30:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126278/154911 [02:28<00:24, 1177.53 examples/s]"
      ]
     },
     {
@@ -124183,8 +20741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10874/12318 [18:50:23<2:30:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10874/12318 [18:50:23<2:30:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126399/154911 [02:28<00:25, 1139.04 examples/s]"
      ]
     },
     {
@@ -124192,8 +20749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10875/12318 [18:50:30<2:30:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10875/12318 [18:50:30<2:30:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126515/154911 [02:28<00:25, 1112.24 examples/s]"
      ]
     },
     {
@@ -124201,8 +20757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10876/12318 [18:50:35<2:29:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10876/12318 [18:50:35<2:29:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126638/154911 [02:29<00:24, 1134.76 examples/s]"
      ]
     },
     {
@@ -124210,8 +20765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10877/12318 [18:50:38<2:29:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10877/12318 [18:50:38<2:29:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126754/154911 [02:29<00:25, 1122.07 examples/s]"
      ]
     },
     {
@@ -124219,8 +20773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10878/12318 [18:50:45<2:29:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10878/12318 [18:50:45<2:29:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126875/154911 [02:29<00:24, 1137.69 examples/s]"
      ]
     },
     {
@@ -124228,8 +20781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10879/12318 [18:50:49<2:29:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10879/12318 [18:50:49<2:29:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 126990/154911 [02:29<00:25, 1102.53 examples/s]"
      ]
     },
     {
@@ -124237,8 +20789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10880/12318 [18:51:20<2:29:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10880/12318 [18:51:20<2:29:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 127101/154911 [02:29<00:25, 1094.54 examples/s]"
      ]
     },
     {
@@ -124246,8 +20797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10881/12318 [18:51:26<2:29:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10881/12318 [18:51:26<2:29:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▋ | 127215/154911 [02:29<00:25, 1105.96 examples/s]"
      ]
     },
     {
@@ -124255,8 +20805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10882/12318 [18:51:35<2:29:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10882/12318 [18:51:35<2:29:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▊ | 127332/154911 [02:29<00:24, 1115.30 examples/s]"
      ]
     },
     {
@@ -124264,8 +20813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10883/12318 [18:51:42<2:29:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10883/12318 [18:51:42<2:29:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▊ | 127444/154911 [02:29<00:24, 1101.91 examples/s]"
      ]
     },
     {
@@ -124273,8 +20821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10884/12318 [18:51:47<2:29:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10884/12318 [18:51:47<2:29:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▊ | 127555/154911 [02:29<00:25, 1089.29 examples/s]"
      ]
     },
     {
@@ -124282,8 +20829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10885/12318 [18:51:54<2:29:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10885/12318 [18:51:54<2:29:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▊ | 127665/154911 [02:29<00:25, 1073.45 examples/s]"
      ]
     },
     {
@@ -124291,8 +20837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10886/12318 [18:51:55<2:28:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10886/12318 [18:51:55<2:28:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  82%|█████▊ | 127773/154911 [02:30<00:25, 1050.26 examples/s]"
      ]
     },
     {
@@ -124300,8 +20845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10887/12318 [18:51:58<2:28:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10887/12318 [18:51:58<2:28:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 127891/154911 [02:30<00:25, 1078.04 examples/s]"
      ]
     },
     {
@@ -124309,8 +20853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10888/12318 [18:52:02<2:28:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10888/12318 [18:52:02<2:28:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128005/154911 [02:30<00:24, 1095.40 examples/s]"
      ]
     },
     {
@@ -124318,8 +20861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10889/12318 [18:52:03<2:28:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10889/12318 [18:52:03<2:28:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128122/154911 [02:30<00:24, 1109.38 examples/s]"
      ]
     },
     {
@@ -124327,8 +20869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10890/12318 [18:52:08<2:28:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10890/12318 [18:52:08<2:28:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128237/154911 [02:30<00:23, 1120.19 examples/s]"
      ]
     },
     {
@@ -124336,8 +20877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10891/12318 [18:52:12<2:28:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10891/12318 [18:52:12<2:28:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128350/154911 [02:30<00:23, 1113.29 examples/s]"
      ]
     },
     {
@@ -124345,8 +20885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10892/12318 [18:52:15<2:28:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10892/12318 [18:52:15<2:28:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128462/154911 [02:30<00:23, 1102.85 examples/s]"
      ]
     },
     {
@@ -124354,8 +20893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10893/12318 [18:52:19<2:28:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10893/12318 [18:52:19<2:28:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128573/154911 [02:30<00:23, 1102.12 examples/s]"
      ]
     },
     {
@@ -124363,8 +20901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10894/12318 [18:52:23<2:28:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10894/12318 [18:52:23<2:28:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128697/154911 [02:30<00:22, 1141.21 examples/s]"
      ]
     },
     {
@@ -124372,8 +20909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10895/12318 [18:52:27<2:27:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10895/12318 [18:52:27<2:27:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128817/154911 [02:31<00:22, 1158.60 examples/s]"
      ]
     },
     {
@@ -124381,8 +20917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10896/12318 [18:52:34<2:27:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10896/12318 [18:52:34<2:27:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 128964/154911 [02:31<00:20, 1241.48 examples/s]"
      ]
     },
     {
@@ -124390,8 +20925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10897/12318 [18:52:42<2:27:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10897/12318 [18:52:42<2:27:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 129099/154911 [02:31<00:20, 1266.27 examples/s]"
      ]
     },
     {
@@ -124399,8 +20933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10898/12318 [18:52:47<2:27:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10898/12318 [18:52:47<2:27:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 129226/154911 [02:31<00:21, 1212.25 examples/s]"
      ]
     },
     {
@@ -124408,8 +20941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10899/12318 [18:52:51<2:27:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10899/12318 [18:52:51<2:27:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  83%|█████▊ | 129348/154911 [02:31<00:22, 1151.84 examples/s]"
      ]
     },
     {
@@ -124417,8 +20949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10900/12318 [18:52:56<2:27:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10900/12318 [18:52:56<2:27:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 129464/154911 [02:31<00:22, 1106.75 examples/s]"
      ]
     },
     {
@@ -124426,8 +20957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  88%|▉| 10901/12318 [18:53:04<2:27:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  88%|▉| 10901/12318 [18:53:04<2:27:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 129576/154911 [02:31<00:23, 1073.84 examples/s]"
      ]
     },
     {
@@ -124435,8 +20965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10902/12318 [18:53:11<2:27:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10902/12318 [18:53:11<2:27:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 129684/154911 [02:31<00:23, 1063.40 examples/s]"
      ]
     },
     {
@@ -124444,8 +20973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10903/12318 [18:53:20<2:27:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10903/12318 [18:53:20<2:27:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 129793/154911 [02:31<00:23, 1060.25 examples/s]"
      ]
     },
     {
@@ -124453,8 +20981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10904/12318 [18:53:28<2:26:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10904/12318 [18:53:28<2:26:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 129901/154911 [02:31<00:23, 1046.01 examples/s]"
      ]
     },
     {
@@ -124462,8 +20989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10905/12318 [18:53:30<2:26:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10905/12318 [18:53:30<2:26:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▊ | 130006/154911 [02:32<00:23, 1042.02 examples/s]"
      ]
     },
     {
@@ -124471,8 +20997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10906/12318 [18:53:36<2:26:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10906/12318 [18:53:36<2:26:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|█████▉ | 130112/154911 [02:32<00:24, 1015.90 examples/s]"
      ]
     },
     {
@@ -124480,8 +21005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10907/12318 [18:53:38<2:26:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10907/12318 [18:53:38<2:26:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▋ | 130214/154911 [02:32<00:25, 955.14 examples/s]"
      ]
     },
     {
@@ -124489,8 +21013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10908/12318 [18:53:43<2:26:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10908/12318 [18:53:43<2:26:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▋ | 130321/154911 [02:32<00:24, 985.13 examples/s]"
      ]
     },
     {
@@ -124498,8 +21021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10909/12318 [18:53:45<2:26:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10909/12318 [18:53:45<2:26:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▋ | 130421/154911 [02:32<00:24, 980.23 examples/s]"
      ]
     },
     {
@@ -124507,8 +21029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10910/12318 [18:53:50<2:26:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10910/12318 [18:53:50<2:26:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▋ | 130520/154911 [02:32<00:25, 973.48 examples/s]"
      ]
     },
     {
@@ -124516,8 +21037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10911/12318 [18:53:56<2:26:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10911/12318 [18:53:56<2:26:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▋ | 130626/154911 [02:32<00:24, 995.66 examples/s]"
      ]
     },
     {
@@ -124525,8 +21045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10912/12318 [18:54:37<2:26:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10912/12318 [18:54:37<2:26:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▊ | 130726/154911 [02:32<00:25, 937.18 examples/s]"
      ]
     },
     {
@@ -124534,8 +21053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10913/12318 [18:54:45<2:26:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10913/12318 [18:54:45<2:26:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  84%|██████▊ | 130841/154911 [02:32<00:24, 996.12 examples/s]"
      ]
     },
     {
@@ -124543,8 +21061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10914/12318 [18:54:49<2:25:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10914/12318 [18:54:49<2:25:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 130949/154911 [02:33<00:23, 1017.83 examples/s]"
      ]
     },
     {
@@ -124552,8 +21069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10915/12318 [18:54:58<2:25:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10915/12318 [18:54:58<2:25:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131053/154911 [02:33<00:23, 1008.03 examples/s]"
      ]
     },
     {
@@ -124561,8 +21077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10916/12318 [18:55:04<2:25:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10916/12318 [18:55:04<2:25:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|██████▊ | 131156/154911 [02:33<00:23, 992.67 examples/s]"
      ]
     },
     {
@@ -124570,8 +21085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10917/12318 [18:55:13<2:25:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10917/12318 [18:55:13<2:25:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131264/154911 [02:33<00:23, 1011.66 examples/s]"
      ]
     },
     {
@@ -124579,8 +21093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10918/12318 [18:55:21<2:25:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10918/12318 [18:55:21<2:25:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131368/154911 [02:33<00:23, 1000.14 examples/s]"
      ]
     },
     {
@@ -124588,8 +21101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10919/12318 [18:55:26<2:25:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10919/12318 [18:55:26<2:25:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131478/154911 [02:33<00:22, 1025.61 examples/s]"
      ]
     },
     {
@@ -124597,8 +21109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10920/12318 [18:55:31<2:25:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10920/12318 [18:55:31<2:25:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131583/154911 [02:33<00:22, 1017.78 examples/s]"
      ]
     },
     {
@@ -124606,8 +21117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10921/12318 [18:55:36<2:25:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10921/12318 [18:55:36<2:25:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|██████▊ | 131687/154911 [02:33<00:23, 995.55 examples/s]"
      ]
     },
     {
@@ -124615,8 +21125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10922/12318 [18:55:41<2:25:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10922/12318 [18:55:41<2:25:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131797/154911 [02:33<00:22, 1024.95 examples/s]"
      ]
     },
     {
@@ -124624,8 +21133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10923/12318 [18:55:43<2:25:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10923/12318 [18:55:43<2:25:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 131901/154911 [02:33<00:22, 1019.05 examples/s]"
      ]
     },
     {
@@ -124633,8 +21141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10924/12318 [18:55:49<2:24:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10924/12318 [18:55:49<2:24:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 132004/154911 [02:34<00:22, 1016.71 examples/s]"
      ]
     },
     {
@@ -124642,8 +21149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10925/12318 [18:55:54<2:24:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10925/12318 [18:55:54<2:24:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 132111/154911 [02:34<00:22, 1030.85 examples/s]"
      ]
     },
     {
@@ -124651,7 +21157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10926/12318 [18:55:57<2:24:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 132215/154911 [02:34<00:22, 1000.41 examples/s]"
      ]
     },
     {
@@ -124659,7 +21165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10926/12318 [18:55:57<2:24:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|█████▉ | 132318/154911 [02:34<00:22, 1000.24 examples/s]"
      ]
     },
     {
@@ -124667,8 +21173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10927/12318 [18:56:01<2:24:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10927/12318 [18:56:01<2:24:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  85%|██████▊ | 132421/154911 [02:34<00:22, 995.74 examples/s]"
      ]
     },
     {
@@ -124676,8 +21181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10928/12318 [18:56:05<2:24:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10928/12318 [18:56:05<2:24:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▊ | 132522/154911 [02:34<00:22, 984.98 examples/s]"
      ]
     },
     {
@@ -124685,8 +21189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10929/12318 [18:56:07<2:24:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10929/12318 [18:56:07<2:24:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|█████▉ | 132629/154911 [02:34<00:22, 1009.27 examples/s]"
      ]
     },
     {
@@ -124694,8 +21197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10930/12318 [18:56:13<2:24:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10930/12318 [18:56:13<2:24:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▊ | 132732/154911 [02:34<00:22, 986.55 examples/s]"
      ]
     },
     {
@@ -124703,8 +21205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10931/12318 [18:56:16<2:24:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10931/12318 [18:56:16<2:24:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▊ | 132834/154911 [02:34<00:22, 992.73 examples/s]"
      ]
     },
     {
@@ -124712,8 +21213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10932/12318 [18:56:22<2:24:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10932/12318 [18:56:22<2:24:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▊ | 132934/154911 [02:35<00:22, 994.24 examples/s]"
      ]
     },
     {
@@ -124721,8 +21221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10933/12318 [18:56:26<2:23:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10933/12318 [18:56:26<2:23:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▊ | 133036/154911 [02:35<00:22, 968.88 examples/s]"
      ]
     },
     {
@@ -124730,8 +21229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10934/12318 [18:56:32<2:23:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10934/12318 [18:56:32<2:23:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133134/154911 [02:35<00:22, 970.22 examples/s]"
      ]
     },
     {
@@ -124739,8 +21237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10935/12318 [18:56:36<2:23:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10935/12318 [18:56:36<2:23:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133233/154911 [02:35<00:22, 968.39 examples/s]"
      ]
     },
     {
@@ -124748,8 +21245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10936/12318 [18:56:40<2:23:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10936/12318 [18:56:40<2:23:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133332/154911 [02:35<00:22, 974.40 examples/s]"
      ]
     },
     {
@@ -124757,8 +21253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10937/12318 [18:56:45<2:23:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10937/12318 [18:56:45<2:23:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133431/154911 [02:35<00:23, 929.27 examples/s]"
      ]
     },
     {
@@ -124766,8 +21261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10938/12318 [18:56:52<2:23:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10938/12318 [18:56:52<2:23:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133533/154911 [02:35<00:22, 954.82 examples/s]"
      ]
     },
     {
@@ -124775,8 +21269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10939/12318 [18:56:57<2:23:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10939/12318 [18:56:57<2:23:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133629/154911 [02:35<00:22, 932.02 examples/s]"
      ]
     },
     {
@@ -124784,8 +21277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10940/12318 [18:57:05<2:23:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10940/12318 [18:57:05<2:23:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133723/154911 [02:35<00:23, 903.60 examples/s]"
      ]
     },
     {
@@ -124793,8 +21285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10941/12318 [18:57:13<2:23:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10941/12318 [18:57:13<2:23:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133815/154911 [02:35<00:23, 885.75 examples/s]"
      ]
     },
     {
@@ -124802,8 +21293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10942/12318 [18:57:16<2:23:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10942/12318 [18:57:16<2:23:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  86%|██████▉ | 133915/154911 [02:36<00:23, 907.96 examples/s]"
      ]
     },
     {
@@ -124811,8 +21301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10943/12318 [18:57:18<2:22:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10943/12318 [18:57:18<2:22:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134008/154911 [02:36<00:23, 898.57 examples/s]"
      ]
     },
     {
@@ -124820,8 +21309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10944/12318 [18:58:11<2:22:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10944/12318 [18:58:11<2:22:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134100/154911 [02:36<00:23, 890.36 examples/s]"
      ]
     },
     {
@@ -124829,8 +21317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10945/12318 [18:58:19<2:22:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10945/12318 [18:58:19<2:22:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134208/154911 [02:36<00:22, 927.31 examples/s]"
      ]
     },
     {
@@ -124838,8 +21325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10946/12318 [18:58:22<2:22:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10946/12318 [18:58:22<2:22:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134301/154911 [02:36<00:22, 927.48 examples/s]"
      ]
     },
     {
@@ -124847,8 +21333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10947/12318 [18:58:27<2:22:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10947/12318 [18:58:27<2:22:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134398/154911 [02:36<00:22, 926.72 examples/s]"
      ]
     },
     {
@@ -124856,8 +21341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10948/12318 [18:58:33<2:22:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10948/12318 [18:58:33<2:22:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134492/154911 [02:36<00:22, 908.38 examples/s]"
      ]
     },
     {
@@ -124865,8 +21349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10949/12318 [18:58:37<2:22:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10949/12318 [18:58:37<2:22:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134584/154911 [02:36<00:22, 891.38 examples/s]"
      ]
     },
     {
@@ -124874,8 +21357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10950/12318 [18:58:45<2:22:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10950/12318 [18:58:45<2:22:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134675/154911 [02:36<00:23, 878.53 examples/s]"
      ]
     },
     {
@@ -124883,8 +21365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10951/12318 [18:58:48<2:22:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10951/12318 [18:58:48<2:22:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134770/154911 [02:37<00:22, 898.41 examples/s]"
      ]
     },
     {
@@ -124892,8 +21373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10952/12318 [18:58:57<2:22:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10952/12318 [18:58:57<2:22:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████▉ | 134861/154911 [02:37<00:23, 870.85 examples/s]"
      ]
     },
     {
@@ -124901,8 +21381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10953/12318 [18:59:03<2:21:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10953/12318 [18:59:03<2:21:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████ | 134999/154911 [02:37<00:19, 1014.88 examples/s]"
      ]
     },
     {
@@ -124910,8 +21389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10954/12318 [18:59:11<2:21:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10954/12318 [18:59:11<2:21:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████ | 135149/154911 [02:37<00:17, 1154.75 examples/s]"
      ]
     },
     {
@@ -124919,8 +21397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10955/12318 [18:59:13<2:21:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10955/12318 [18:59:13<2:21:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████ | 135288/154911 [02:37<00:16, 1221.44 examples/s]"
      ]
     },
     {
@@ -124928,7 +21405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10956/12318 [18:59:20<2:21:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████ | 135412/154911 [02:37<00:17, 1137.29 examples/s]"
      ]
     },
     {
@@ -124936,7 +21413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10956/12318 [18:59:20<2:21:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  87%|██████ | 135528/154911 [02:37<00:18, 1059.29 examples/s]"
      ]
     },
     {
@@ -124944,8 +21421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10957/12318 [18:59:25<2:21:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10957/12318 [18:59:25<2:21:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|██████▏| 135636/154911 [02:37<00:19, 1002.80 examples/s]"
      ]
     },
     {
@@ -124953,8 +21429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10958/12318 [18:59:30<2:21:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10958/12318 [18:59:30<2:21:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|██████▏| 135743/154911 [02:37<00:18, 1012.89 examples/s]"
      ]
     },
     {
@@ -124962,8 +21437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10959/12318 [18:59:36<2:21:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10959/12318 [18:59:36<2:21:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|██████▏| 135861/154911 [02:38<00:18, 1051.85 examples/s]"
      ]
     },
     {
@@ -124971,8 +21445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10960/12318 [18:59:41<2:21:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10960/12318 [18:59:41<2:21:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|██████▏| 135969/154911 [02:38<00:18, 1029.83 examples/s]"
      ]
     },
     {
@@ -124980,8 +21453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10961/12318 [18:59:47<2:21:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10961/12318 [18:59:47<2:21:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136074/154911 [02:38<00:19, 967.01 examples/s]"
      ]
     },
     {
@@ -124989,8 +21461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10962/12318 [18:59:52<2:21:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10962/12318 [18:59:52<2:21:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136172/154911 [02:38<00:21, 889.71 examples/s]"
      ]
     },
     {
@@ -124998,8 +21469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10963/12318 [19:00:01<2:20:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10963/12318 [19:00:01<2:20:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136264/154911 [02:38<00:21, 850.58 examples/s]"
      ]
     },
     {
@@ -125007,8 +21477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10964/12318 [19:00:06<2:20:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10964/12318 [19:00:06<2:20:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136352/154911 [02:38<00:22, 822.81 examples/s]"
      ]
     },
     {
@@ -125016,8 +21485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10965/12318 [19:00:11<2:20:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10965/12318 [19:00:11<2:20:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136435/154911 [02:38<00:22, 822.71 examples/s]"
      ]
     },
     {
@@ -125025,8 +21493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10966/12318 [19:00:17<2:20:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10966/12318 [19:00:17<2:20:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136520/154911 [02:38<00:22, 799.78 examples/s]"
      ]
     },
     {
@@ -125034,8 +21501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10967/12318 [19:00:21<2:20:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10967/12318 [19:00:21<2:20:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136605/154911 [02:38<00:22, 803.28 examples/s]"
      ]
     },
     {
@@ -125043,8 +21509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10968/12318 [19:00:28<2:20:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10968/12318 [19:00:28<2:20:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136691/154911 [02:39<00:22, 792.90 examples/s]"
      ]
     },
     {
@@ -125052,8 +21517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10969/12318 [19:00:33<2:20:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10969/12318 [19:00:33<2:20:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136775/154911 [02:39<00:22, 805.24 examples/s]"
      ]
     },
     {
@@ -125061,8 +21525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10970/12318 [19:00:39<2:20:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10970/12318 [19:00:39<2:20:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136857/154911 [02:39<00:22, 804.50 examples/s]"
      ]
     },
     {
@@ -125070,8 +21533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10971/12318 [19:00:43<2:20:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10971/12318 [19:00:43<2:20:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 136947/154911 [02:39<00:21, 824.09 examples/s]"
      ]
     },
     {
@@ -125079,7 +21541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10972/12318 [19:00:51<2:19:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  88%|███████ | 137032/154911 [02:39<00:21, 831.19 examples/s]"
      ]
     },
     {
@@ -125087,7 +21549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10972/12318 [19:00:51<2:19:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137134/154911 [02:39<00:20, 883.61 examples/s]"
      ]
     },
     {
@@ -125095,8 +21557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10973/12318 [19:00:56<2:19:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10973/12318 [19:00:56<2:19:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|██████▏| 137291/154911 [02:39<00:16, 1082.85 examples/s]"
      ]
     },
     {
@@ -125104,8 +21565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10974/12318 [19:01:04<2:19:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10974/12318 [19:01:04<2:19:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|██████▏| 137401/154911 [02:39<00:16, 1074.13 examples/s]"
      ]
     },
     {
@@ -125113,8 +21573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10975/12318 [19:01:07<2:19:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10975/12318 [19:01:07<2:19:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137509/154911 [02:39<00:17, 969.68 examples/s]"
      ]
     },
     {
@@ -125122,8 +21581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10976/12318 [19:01:22<2:19:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10976/12318 [19:01:22<2:19:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137609/154911 [02:40<00:19, 905.62 examples/s]"
      ]
     },
     {
@@ -125131,8 +21589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10977/12318 [19:01:28<2:19:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10977/12318 [19:01:28<2:19:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137703/154911 [02:40<00:19, 874.58 examples/s]"
      ]
     },
     {
@@ -125140,8 +21597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10978/12318 [19:01:33<2:19:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10978/12318 [19:01:33<2:19:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137793/154911 [02:40<00:20, 834.45 examples/s]"
      ]
     },
     {
@@ -125149,8 +21605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10979/12318 [19:01:39<2:19:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10979/12318 [19:01:39<2:19:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████ | 137898/154911 [02:40<00:19, 877.40 examples/s]"
      ]
     },
     {
@@ -125158,8 +21613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10980/12318 [19:01:45<2:19:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10980/12318 [19:01:45<2:19:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138015/154911 [02:40<00:17, 955.95 examples/s]"
      ]
     },
     {
@@ -125167,8 +21621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10981/12318 [19:01:52<2:19:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10981/12318 [19:01:52<2:19:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138130/154911 [02:40<00:16, 996.61 examples/s]"
      ]
     },
     {
@@ -125176,8 +21629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10982/12318 [19:01:55<2:18:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10982/12318 [19:01:55<2:18:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138232/154911 [02:40<00:17, 934.17 examples/s]"
      ]
     },
     {
@@ -125185,8 +21637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10983/12318 [19:01:58<2:18:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10983/12318 [19:01:58<2:18:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138329/154911 [02:40<00:18, 878.16 examples/s]"
      ]
     },
     {
@@ -125194,8 +21645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10984/12318 [19:02:02<2:18:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10984/12318 [19:02:02<2:18:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138428/154911 [02:40<00:18, 906.11 examples/s]"
      ]
     },
     {
@@ -125203,8 +21653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10985/12318 [19:02:07<2:18:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10985/12318 [19:02:07<2:18:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138521/154911 [02:41<00:18, 882.95 examples/s]"
      ]
     },
     {
@@ -125212,8 +21661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10986/12318 [19:02:16<2:18:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10986/12318 [19:02:16<2:18:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  89%|███████▏| 138613/154911 [02:41<00:18, 885.51 examples/s]"
      ]
     },
     {
@@ -125221,8 +21669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10987/12318 [19:02:21<2:18:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10987/12318 [19:02:21<2:18:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 138703/154911 [02:41<00:18, 888.24 examples/s]"
      ]
     },
     {
@@ -125230,8 +21677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10988/12318 [19:02:27<2:18:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10988/12318 [19:02:27<2:18:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 138793/154911 [02:41<00:18, 875.41 examples/s]"
      ]
     },
     {
@@ -125239,8 +21685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10989/12318 [19:02:31<2:18:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10989/12318 [19:02:31<2:18:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 138882/154911 [02:41<00:18, 873.25 examples/s]"
      ]
     },
     {
@@ -125248,8 +21693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10990/12318 [19:02:36<2:18:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10990/12318 [19:02:36<2:18:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 138992/154911 [02:41<00:17, 932.08 examples/s]"
      ]
     },
     {
@@ -125257,8 +21701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10991/12318 [19:02:40<2:17:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10991/12318 [19:02:40<2:17:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139086/154911 [02:41<00:17, 924.70 examples/s]"
      ]
     },
     {
@@ -125266,8 +21709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10992/12318 [19:02:49<2:17:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10992/12318 [19:02:49<2:17:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139204/154911 [02:41<00:15, 982.85 examples/s]"
      ]
     },
     {
@@ -125275,8 +21717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10993/12318 [19:02:55<2:17:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10993/12318 [19:02:55<2:17:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139305/154911 [02:41<00:16, 970.78 examples/s]"
      ]
     },
     {
@@ -125284,8 +21725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10994/12318 [19:03:04<2:17:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10994/12318 [19:03:04<2:17:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139404/154911 [02:42<00:16, 912.35 examples/s]"
      ]
     },
     {
@@ -125293,8 +21733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10995/12318 [19:03:07<2:17:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10995/12318 [19:03:07<2:17:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139504/154911 [02:42<00:16, 934.39 examples/s]"
      ]
     },
     {
@@ -125302,8 +21741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10996/12318 [19:03:12<2:17:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10996/12318 [19:03:12<2:17:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139599/154911 [02:42<00:17, 891.53 examples/s]"
      ]
     },
     {
@@ -125311,8 +21749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10997/12318 [19:03:18<2:17:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10997/12318 [19:03:18<2:17:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139708/154911 [02:42<00:16, 946.68 examples/s]"
      ]
     },
     {
@@ -125320,8 +21757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10998/12318 [19:03:26<2:17:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10998/12318 [19:03:26<2:17:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139805/154911 [02:42<00:15, 950.53 examples/s]"
      ]
     },
     {
@@ -125329,8 +21765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 10999/12318 [19:03:27<2:17:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 10999/12318 [19:03:27<2:17:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139901/154911 [02:42<00:16, 930.31 examples/s]"
      ]
     },
     {
@@ -125338,8 +21773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11000/12318 [19:03:31<2:17:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11000/12318 [19:03:31<2:17:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 139996/154911 [02:42<00:16, 901.21 examples/s]"
      ]
     },
     {
@@ -125347,8 +21781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11001/12318 [19:03:39<2:16:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11001/12318 [19:03:39<2:16:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 140089/154911 [02:42<00:16, 898.62 examples/s]"
      ]
     },
     {
@@ -125356,8 +21789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11002/12318 [19:03:44<2:16:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11002/12318 [19:03:44<2:16:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  90%|███████▏| 140180/154911 [02:42<00:16, 880.60 examples/s]"
      ]
     },
     {
@@ -125365,8 +21797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11003/12318 [19:03:46<2:16:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11003/12318 [19:03:46<2:16:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▏| 140269/154911 [02:42<00:16, 872.39 examples/s]"
      ]
     },
     {
@@ -125374,8 +21805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11004/12318 [19:03:47<2:16:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11004/12318 [19:03:47<2:16:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▏| 140367/154911 [02:43<00:16, 899.62 examples/s]"
      ]
     },
     {
@@ -125383,8 +21813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11005/12318 [19:03:53<2:16:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11005/12318 [19:03:53<2:16:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140458/154911 [02:43<00:16, 877.05 examples/s]"
      ]
     },
     {
@@ -125392,8 +21821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11006/12318 [19:04:00<2:16:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11006/12318 [19:04:00<2:16:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140557/154911 [02:43<00:16, 889.46 examples/s]"
      ]
     },
     {
@@ -125401,8 +21829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11007/12318 [19:04:06<2:16:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11007/12318 [19:04:06<2:16:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140647/154911 [02:43<00:15, 891.68 examples/s]"
      ]
     },
     {
@@ -125410,8 +21837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11008/12318 [19:04:44<2:16:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11008/12318 [19:04:44<2:16:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140738/154911 [02:43<00:15, 895.25 examples/s]"
      ]
     },
     {
@@ -125419,8 +21845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11009/12318 [19:04:51<2:16:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11009/12318 [19:04:51<2:16:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140829/154911 [02:43<00:15, 880.92 examples/s]"
      ]
     },
     {
@@ -125428,8 +21853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11010/12318 [19:04:57<2:16:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11010/12318 [19:04:57<2:16:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 140920/154911 [02:43<00:15, 874.49 examples/s]"
      ]
     },
     {
@@ -125437,8 +21861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11011/12318 [19:04:59<2:15:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11011/12318 [19:04:59<2:15:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141008/154911 [02:43<00:16, 860.95 examples/s]"
      ]
     },
     {
@@ -125446,8 +21869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11012/12318 [19:05:06<2:15:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11012/12318 [19:05:06<2:15:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141103/154911 [02:43<00:15, 883.39 examples/s]"
      ]
     },
     {
@@ -125455,8 +21877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11013/12318 [19:05:15<2:15:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11013/12318 [19:05:15<2:15:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141193/154911 [02:44<00:15, 859.13 examples/s]"
      ]
     },
     {
@@ -125464,8 +21885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11014/12318 [19:05:17<2:15:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11014/12318 [19:05:17<2:15:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141288/154911 [02:44<00:15, 885.08 examples/s]"
      ]
     },
     {
@@ -125473,8 +21893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11015/12318 [19:05:23<2:15:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11015/12318 [19:05:23<2:15:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141378/154911 [02:44<00:15, 873.26 examples/s]"
      ]
     },
     {
@@ -125482,8 +21901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11016/12318 [19:05:26<2:15:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11016/12318 [19:05:26<2:15:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141467/154911 [02:44<00:15, 859.74 examples/s]"
      ]
     },
     {
@@ -125491,8 +21909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11017/12318 [19:05:32<2:15:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11017/12318 [19:05:32<2:15:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141556/154911 [02:44<00:15, 864.58 examples/s]"
      ]
     },
     {
@@ -125500,8 +21917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11018/12318 [19:05:37<2:15:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11018/12318 [19:05:37<2:15:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141643/154911 [02:44<00:15, 843.22 examples/s]"
      ]
     },
     {
@@ -125509,8 +21925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11019/12318 [19:05:43<2:15:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11019/12318 [19:05:43<2:15:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  91%|███████▎| 141728/154911 [02:44<00:15, 836.76 examples/s]"
      ]
     },
     {
@@ -125518,8 +21933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11020/12318 [19:05:45<2:14:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11020/12318 [19:05:45<2:14:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 141812/154911 [02:44<00:16, 808.61 examples/s]"
      ]
     },
     {
@@ -125527,8 +21941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11021/12318 [19:05:54<2:14:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11021/12318 [19:05:54<2:14:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 141894/154911 [02:44<00:16, 793.31 examples/s]"
      ]
     },
     {
@@ -125536,7 +21949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11022/12318 [19:05:59<2:14:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 141975/154911 [02:44<00:16, 794.92 examples/s]"
      ]
     },
     {
@@ -125544,7 +21957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11022/12318 [19:05:59<2:14:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142055/154911 [02:45<00:16, 794.94 examples/s]"
      ]
     },
     {
@@ -125552,8 +21965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11023/12318 [19:06:01<2:14:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11023/12318 [19:06:01<2:14:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142135/154911 [02:45<00:16, 766.61 examples/s]"
      ]
     },
     {
@@ -125561,8 +21973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  89%|▉| 11024/12318 [19:06:06<2:14:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  89%|▉| 11024/12318 [19:06:06<2:14:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142212/154911 [02:45<00:16, 759.64 examples/s]"
      ]
     },
     {
@@ -125570,8 +21981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11025/12318 [19:06:12<2:14:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11025/12318 [19:06:12<2:14:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142290/154911 [02:45<00:16, 754.19 examples/s]"
      ]
     },
     {
@@ -125579,8 +21989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11026/12318 [19:06:19<2:14:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11026/12318 [19:06:19<2:14:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142367/154911 [02:45<00:16, 755.43 examples/s]"
      ]
     },
     {
@@ -125588,8 +21997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11027/12318 [19:06:28<2:14:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11027/12318 [19:06:28<2:14:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142445/154911 [02:45<00:16, 760.01 examples/s]"
      ]
     },
     {
@@ -125597,8 +22005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11028/12318 [19:06:35<2:14:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11028/12318 [19:06:35<2:14:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142522/154911 [02:45<00:16, 749.81 examples/s]"
      ]
     },
     {
@@ -125606,8 +22013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11029/12318 [19:06:36<2:14:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11029/12318 [19:06:36<2:14:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142598/154911 [02:45<00:16, 743.23 examples/s]"
      ]
     },
     {
@@ -125615,8 +22021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11030/12318 [19:06:45<2:13:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11030/12318 [19:06:45<2:13:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142674/154911 [02:45<00:16, 739.53 examples/s]"
      ]
     },
     {
@@ -125624,8 +22029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11031/12318 [19:06:51<2:13:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11031/12318 [19:06:51<2:13:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▎| 142749/154911 [02:46<00:16, 737.48 examples/s]"
      ]
     },
     {
@@ -125633,8 +22037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11032/12318 [19:06:57<2:13:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11032/12318 [19:06:57<2:13:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 142824/154911 [02:46<00:17, 710.62 examples/s]"
      ]
     },
     {
@@ -125642,8 +22045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11033/12318 [19:07:05<2:13:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11033/12318 [19:07:05<2:13:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 142907/154911 [02:46<00:16, 738.32 examples/s]"
      ]
     },
     {
@@ -125651,8 +22053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11034/12318 [19:07:10<2:13:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11034/12318 [19:07:10<2:13:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 142982/154911 [02:46<00:16, 738.60 examples/s]"
      ]
     },
     {
@@ -125660,8 +22061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11035/12318 [19:07:15<2:13:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11035/12318 [19:07:15<2:13:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 143065/154911 [02:46<00:15, 760.23 examples/s]"
      ]
     },
     {
@@ -125669,8 +22069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11036/12318 [19:07:22<2:13:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11036/12318 [19:07:22<2:13:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 143181/154911 [02:46<00:13, 876.11 examples/s]"
      ]
     },
     {
@@ -125678,8 +22077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11037/12318 [19:07:30<2:13:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11037/12318 [19:07:30<2:13:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  92%|███████▍| 143272/154911 [02:46<00:13, 881.50 examples/s]"
      ]
     },
     {
@@ -125687,8 +22085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11038/12318 [19:07:39<2:13:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11038/12318 [19:07:39<2:13:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143362/154911 [02:46<00:14, 824.59 examples/s]"
      ]
     },
     {
@@ -125696,8 +22093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11039/12318 [19:07:41<2:12:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11039/12318 [19:07:41<2:12:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143447/154911 [02:46<00:15, 760.58 examples/s]"
      ]
     },
     {
@@ -125705,8 +22101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11040/12318 [19:07:57<2:12:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11040/12318 [19:07:57<2:12:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143525/154911 [02:47<00:15, 717.96 examples/s]"
      ]
     },
     {
@@ -125714,8 +22109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11041/12318 [19:08:01<2:12:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11041/12318 [19:08:01<2:12:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143599/154911 [02:47<00:15, 707.47 examples/s]"
      ]
     },
     {
@@ -125723,8 +22117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11042/12318 [19:08:03<2:12:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11042/12318 [19:08:03<2:12:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143672/154911 [02:47<00:15, 710.84 examples/s]"
      ]
     },
     {
@@ -125732,8 +22125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11043/12318 [19:08:10<2:12:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11043/12318 [19:08:10<2:12:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143745/154911 [02:47<00:16, 682.73 examples/s]"
      ]
     },
     {
@@ -125741,8 +22133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11044/12318 [19:08:14<2:12:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11044/12318 [19:08:14<2:12:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143814/154911 [02:47<00:16, 677.53 examples/s]"
      ]
     },
     {
@@ -125750,8 +22141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11045/12318 [19:08:20<2:12:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11045/12318 [19:08:20<2:12:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143887/154911 [02:47<00:16, 687.00 examples/s]"
      ]
     },
     {
@@ -125759,8 +22149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11046/12318 [19:08:21<2:12:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11046/12318 [19:08:21<2:12:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 143958/154911 [02:47<00:16, 681.27 examples/s]"
      ]
     },
     {
@@ -125768,8 +22157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11047/12318 [19:08:25<2:12:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11047/12318 [19:08:25<2:12:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144049/154911 [02:47<00:14, 736.58 examples/s]"
      ]
     },
     {
@@ -125777,8 +22165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11048/12318 [19:08:32<2:12:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11048/12318 [19:08:32<2:12:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144136/154911 [02:47<00:13, 773.03 examples/s]"
      ]
     },
     {
@@ -125786,8 +22173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11049/12318 [19:08:41<2:11:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11049/12318 [19:08:41<2:11:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144214/154911 [02:47<00:13, 772.77 examples/s]"
      ]
     },
     {
@@ -125795,7 +22181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11050/12318 [19:08:49<2:11:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144293/154911 [02:48<00:15, 675.44 examples/s]"
      ]
     },
     {
@@ -125803,7 +22189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11050/12318 [19:08:49<2:11:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144363/154911 [02:48<00:16, 637.20 examples/s]"
      ]
     },
     {
@@ -125811,8 +22197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11051/12318 [19:08:57<2:11:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11051/12318 [19:08:57<2:11:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144430/154911 [02:48<00:16, 621.93 examples/s]"
      ]
     },
     {
@@ -125820,8 +22205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11052/12318 [19:09:02<2:11:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11052/12318 [19:09:02<2:11:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144495/154911 [02:48<00:17, 609.01 examples/s]"
      ]
     },
     {
@@ -125829,8 +22213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11053/12318 [19:09:09<2:11:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11053/12318 [19:09:09<2:11:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144558/154911 [02:48<00:17, 577.38 examples/s]"
      ]
     },
     {
@@ -125838,8 +22221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11054/12318 [19:09:17<2:11:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11054/12318 [19:09:17<2:11:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144618/154911 [02:48<00:17, 577.91 examples/s]"
      ]
     },
     {
@@ -125847,8 +22229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11055/12318 [19:09:22<2:11:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11055/12318 [19:09:22<2:11:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144677/154911 [02:48<00:18, 567.75 examples/s]"
      ]
     },
     {
@@ -125856,8 +22237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11056/12318 [19:09:28<2:11:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11056/12318 [19:09:28<2:11:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144735/154911 [02:48<00:17, 566.10 examples/s]"
      ]
     },
     {
@@ -125865,8 +22245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11057/12318 [19:09:32<2:11:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11057/12318 [19:09:32<2:11:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  93%|███████▍| 144792/154911 [02:49<00:17, 566.99 examples/s]"
      ]
     },
     {
@@ -125874,8 +22253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11058/12318 [19:09:39<2:10:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11058/12318 [19:09:39<2:10:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 144849/154911 [02:49<00:17, 560.59 examples/s]"
      ]
     },
     {
@@ -125883,8 +22261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11059/12318 [19:09:47<2:10:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11059/12318 [19:09:47<2:10:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 144906/154911 [02:49<00:18, 552.82 examples/s]"
      ]
     },
     {
@@ -125892,8 +22269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11060/12318 [19:09:55<2:10:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11060/12318 [19:09:55<2:10:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 144963/154911 [02:49<00:18, 544.76 examples/s]"
      ]
     },
     {
@@ -125901,8 +22277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11061/12318 [19:10:01<2:10:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11061/12318 [19:10:01<2:10:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 145023/154911 [02:49<00:18, 544.60 examples/s]"
      ]
     },
     {
@@ -125910,8 +22285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11062/12318 [19:10:09<2:10:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11062/12318 [19:10:09<2:10:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 145078/154911 [02:49<00:18, 543.67 examples/s]"
      ]
     },
     {
@@ -125919,8 +22293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11063/12318 [19:10:13<2:10:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11063/12318 [19:10:13<2:10:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 145140/154911 [02:49<00:17, 551.66 examples/s]"
      ]
     },
     {
@@ -125928,8 +22301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11064/12318 [19:10:16<2:10:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11064/12318 [19:10:16<2:10:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▍| 145202/154911 [02:49<00:17, 570.66 examples/s]"
      ]
     },
     {
@@ -125937,8 +22309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11065/12318 [19:10:21<2:10:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11065/12318 [19:10:21<2:10:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145261/154911 [02:49<00:17, 542.25 examples/s]"
      ]
     },
     {
@@ -125946,8 +22317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11066/12318 [19:10:26<2:10:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11066/12318 [19:10:26<2:10:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145316/154911 [02:49<00:17, 540.55 examples/s]"
      ]
     },
     {
@@ -125955,8 +22325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11067/12318 [19:10:31<2:10:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11067/12318 [19:10:31<2:10:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145372/154911 [02:50<00:17, 531.82 examples/s]"
      ]
     },
     {
@@ -125964,8 +22333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11068/12318 [19:10:34<2:09:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11068/12318 [19:10:34<2:09:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145427/154911 [02:50<00:18, 505.71 examples/s]"
      ]
     },
     {
@@ -125973,8 +22341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11069/12318 [19:10:37<2:09:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11069/12318 [19:10:37<2:09:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145479/154911 [02:50<00:19, 480.93 examples/s]"
      ]
     },
     {
@@ -125982,8 +22349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11070/12318 [19:10:43<2:09:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11070/12318 [19:10:43<2:09:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145544/154911 [02:50<00:19, 478.00 examples/s]"
      ]
     },
     {
@@ -125991,8 +22357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11071/12318 [19:10:50<2:09:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11071/12318 [19:10:50<2:09:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145602/154911 [02:50<00:18, 503.05 examples/s]"
      ]
     },
     {
@@ -126000,8 +22365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11072/12318 [19:11:07<2:09:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11072/12318 [19:11:07<2:09:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145656/154911 [02:50<00:18, 508.92 examples/s]"
      ]
     },
     {
@@ -126009,7 +22373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11073/12318 [19:11:10<2:09:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145711/154911 [02:50<00:18, 505.22 examples/s]"
      ]
     },
     {
@@ -126017,7 +22381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11073/12318 [19:11:10<2:09:25,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145770/154911 [02:50<00:17, 520.98 examples/s]"
      ]
     },
     {
@@ -126025,8 +22389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11074/12318 [19:11:13<2:09:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11074/12318 [19:11:13<2:09:19,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145823/154911 [02:51<00:17, 516.29 examples/s]"
      ]
     },
     {
@@ -126034,8 +22397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11075/12318 [19:11:20<2:09:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11075/12318 [19:11:20<2:09:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145876/154911 [02:51<00:18, 497.68 examples/s]"
      ]
     },
     {
@@ -126043,8 +22405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11076/12318 [19:11:22<2:09:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11076/12318 [19:11:22<2:09:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145930/154911 [02:51<00:17, 508.55 examples/s]"
      ]
     },
     {
@@ -126052,8 +22413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11077/12318 [19:11:25<2:08:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11077/12318 [19:11:25<2:08:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 145982/154911 [02:51<00:17, 511.23 examples/s]"
      ]
     },
     {
@@ -126061,8 +22421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11078/12318 [19:11:31<2:08:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11078/12318 [19:11:31<2:08:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146037/154911 [02:51<00:17, 512.19 examples/s]"
      ]
     },
     {
@@ -126070,8 +22429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11079/12318 [19:11:37<2:08:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11079/12318 [19:11:37<2:08:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146092/154911 [02:51<00:17, 517.91 examples/s]"
      ]
     },
     {
@@ -126079,8 +22437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11080/12318 [19:11:44<2:08:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11080/12318 [19:11:44<2:08:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146146/154911 [02:51<00:16, 519.70 examples/s]"
      ]
     },
     {
@@ -126088,8 +22445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11081/12318 [19:11:53<2:08:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11081/12318 [19:11:53<2:08:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146202/154911 [02:51<00:16, 522.89 examples/s]"
      ]
     },
     {
@@ -126097,8 +22453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11082/12318 [19:11:56<2:08:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11082/12318 [19:11:56<2:08:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146256/154911 [02:51<00:16, 515.16 examples/s]"
      ]
     },
     {
@@ -126106,8 +22461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11083/12318 [19:12:01<2:08:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11083/12318 [19:12:01<2:08:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146317/154911 [02:51<00:16, 525.06 examples/s]"
      ]
     },
     {
@@ -126115,8 +22469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11084/12318 [19:12:08<2:08:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11084/12318 [19:12:08<2:08:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  94%|███████▌| 146370/154911 [02:52<00:16, 510.42 examples/s]"
      ]
     },
     {
@@ -126124,8 +22477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11085/12318 [19:12:12<2:08:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11085/12318 [19:12:12<2:08:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146422/154911 [02:52<00:16, 503.86 examples/s]"
      ]
     },
     {
@@ -126133,8 +22485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11086/12318 [19:12:18<2:08:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11086/12318 [19:12:18<2:08:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146474/154911 [02:52<00:17, 476.58 examples/s]"
      ]
     },
     {
@@ -126142,8 +22493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11087/12318 [19:12:26<2:07:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11087/12318 [19:12:26<2:07:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146530/154911 [02:52<00:17, 492.46 examples/s]"
      ]
     },
     {
@@ -126151,8 +22501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11088/12318 [19:12:30<2:07:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11088/12318 [19:12:30<2:07:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146587/154911 [02:52<00:16, 512.05 examples/s]"
      ]
     },
     {
@@ -126160,8 +22509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11089/12318 [19:12:36<2:07:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11089/12318 [19:12:36<2:07:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146639/154911 [02:52<00:16, 507.85 examples/s]"
      ]
     },
     {
@@ -126169,7 +22517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11090/12318 [19:12:37<2:07:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146696/154911 [02:52<00:15, 522.76 examples/s]"
      ]
     },
     {
@@ -126177,7 +22525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11090/12318 [19:12:37<2:07:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146749/154911 [02:52<00:15, 515.59 examples/s]"
      ]
     },
     {
@@ -126185,8 +22533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11091/12318 [19:12:40<2:07:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11091/12318 [19:12:40<2:07:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146812/154911 [02:52<00:14, 546.95 examples/s]"
      ]
     },
     {
@@ -126194,8 +22541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11092/12318 [19:12:43<2:07:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11092/12318 [19:12:43<2:07:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146867/154911 [02:53<00:15, 530.11 examples/s]"
      ]
     },
     {
@@ -126203,8 +22549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11093/12318 [19:12:46<2:07:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11093/12318 [19:12:46<2:07:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146922/154911 [02:53<00:15, 511.76 examples/s]"
      ]
     },
     {
@@ -126212,8 +22557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11094/12318 [19:12:51<2:07:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11094/12318 [19:12:51<2:07:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 146979/154911 [02:53<00:15, 520.70 examples/s]"
      ]
     },
     {
@@ -126221,8 +22565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11095/12318 [19:12:55<2:07:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11095/12318 [19:12:55<2:07:05,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147034/154911 [02:53<00:14, 528.09 examples/s]"
      ]
     },
     {
@@ -126230,8 +22573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11096/12318 [19:13:02<2:06:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11096/12318 [19:13:02<2:06:59,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147088/154911 [02:53<00:14, 524.99 examples/s]"
      ]
     },
     {
@@ -126239,8 +22581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11097/12318 [19:13:10<2:06:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11097/12318 [19:13:10<2:06:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147141/154911 [02:53<00:14, 521.77 examples/s]"
      ]
     },
     {
@@ -126248,8 +22589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11098/12318 [19:13:14<2:06:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11098/12318 [19:13:14<2:06:46,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147195/154911 [02:53<00:15, 510.57 examples/s]"
      ]
     },
     {
@@ -126257,8 +22597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11099/12318 [19:13:22<2:06:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11099/12318 [19:13:22<2:06:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147248/154911 [02:53<00:15, 507.09 examples/s]"
      ]
     },
     {
@@ -126266,8 +22605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11100/12318 [19:13:31<2:06:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11100/12318 [19:13:31<2:06:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147299/154911 [02:53<00:15, 500.05 examples/s]"
      ]
     },
     {
@@ -126275,8 +22613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11101/12318 [19:13:38<2:06:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11101/12318 [19:13:38<2:06:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147353/154911 [02:53<00:14, 509.23 examples/s]"
      ]
     },
     {
@@ -126284,8 +22621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11102/12318 [19:13:43<2:06:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11102/12318 [19:13:43<2:06:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147416/154911 [02:54<00:13, 542.81 examples/s]"
      ]
     },
     {
@@ -126293,8 +22629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11103/12318 [19:13:49<2:06:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11103/12318 [19:13:49<2:06:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147496/154911 [02:54<00:12, 611.44 examples/s]"
      ]
     },
     {
@@ -126302,8 +22637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11104/12318 [19:14:28<2:06:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11104/12318 [19:14:28<2:06:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147581/154911 [02:54<00:10, 669.83 examples/s]"
      ]
     },
     {
@@ -126311,8 +22645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11105/12318 [19:14:32<2:06:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11105/12318 [19:14:32<2:06:06,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▌| 147648/154911 [02:54<00:10, 663.69 examples/s]"
      ]
     },
     {
@@ -126320,8 +22653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11106/12318 [19:14:35<2:06:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11106/12318 [19:14:35<2:06:00,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▋| 147715/154911 [02:54<00:11, 634.71 examples/s]"
      ]
     },
     {
@@ -126329,8 +22661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11107/12318 [19:14:43<2:05:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11107/12318 [19:14:43<2:05:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▋| 147779/154911 [02:54<00:12, 585.26 examples/s]"
      ]
     },
     {
@@ -126338,8 +22669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11108/12318 [19:14:49<2:05:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11108/12318 [19:14:49<2:05:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▋| 147839/154911 [02:54<00:12, 574.59 examples/s]"
      ]
     },
     {
@@ -126347,8 +22677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11109/12318 [19:14:55<2:05:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11109/12318 [19:14:55<2:05:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  95%|███████▋| 147898/154911 [02:54<00:12, 548.30 examples/s]"
      ]
     },
     {
@@ -126356,7 +22685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11110/12318 [19:15:00<2:05:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 147954/154911 [02:54<00:12, 539.30 examples/s]"
      ]
     },
     {
@@ -126364,7 +22693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11110/12318 [19:15:00<2:05:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148009/154911 [02:55<00:13, 524.57 examples/s]"
      ]
     },
     {
@@ -126372,8 +22701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11111/12318 [19:15:05<2:05:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11111/12318 [19:15:05<2:05:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148063/154911 [02:55<00:13, 524.77 examples/s]"
      ]
     },
     {
@@ -126381,8 +22709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11112/12318 [19:15:13<2:05:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11112/12318 [19:15:13<2:05:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148119/154911 [02:55<00:12, 533.57 examples/s]"
      ]
     },
     {
@@ -126390,8 +22717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11113/12318 [19:15:14<2:05:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11113/12318 [19:15:14<2:05:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148173/154911 [02:55<00:12, 534.10 examples/s]"
      ]
     },
     {
@@ -126399,8 +22725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11114/12318 [19:15:18<2:05:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11114/12318 [19:15:18<2:05:09,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148227/154911 [02:55<00:12, 518.28 examples/s]"
      ]
     },
     {
@@ -126408,8 +22733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11115/12318 [19:15:20<2:05:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11115/12318 [19:15:20<2:05:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148279/154911 [02:55<00:12, 513.31 examples/s]"
      ]
     },
     {
@@ -126417,8 +22741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11116/12318 [19:15:24<2:04:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11116/12318 [19:15:24<2:04:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148332/154911 [02:55<00:12, 510.12 examples/s]"
      ]
     },
     {
@@ -126426,8 +22749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11117/12318 [19:15:29<2:04:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11117/12318 [19:15:29<2:04:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148384/154911 [02:55<00:12, 509.26 examples/s]"
      ]
     },
     {
@@ -126435,8 +22757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11118/12318 [19:15:32<2:04:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11118/12318 [19:15:32<2:04:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148451/154911 [02:55<00:11, 549.49 examples/s]"
      ]
     },
     {
@@ -126444,8 +22765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11119/12318 [19:15:40<2:04:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11119/12318 [19:15:40<2:04:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148515/154911 [02:56<00:11, 568.24 examples/s]"
      ]
     },
     {
@@ -126453,8 +22773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11120/12318 [19:15:44<2:04:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11120/12318 [19:15:44<2:04:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148604/154911 [02:56<00:09, 644.46 examples/s]"
      ]
     },
     {
@@ -126462,8 +22781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11121/12318 [19:15:47<2:04:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11121/12318 [19:15:47<2:04:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148685/154911 [02:56<00:09, 678.56 examples/s]"
      ]
     },
     {
@@ -126471,8 +22789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11122/12318 [19:15:52<2:04:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11122/12318 [19:15:52<2:04:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148753/154911 [02:56<00:09, 617.59 examples/s]"
      ]
     },
     {
@@ -126480,8 +22797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11123/12318 [19:15:57<2:04:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11123/12318 [19:15:57<2:04:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148816/154911 [02:56<00:10, 579.61 examples/s]"
      ]
     },
     {
@@ -126489,8 +22805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11124/12318 [19:16:05<2:04:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11124/12318 [19:16:05<2:04:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148876/154911 [02:56<00:10, 552.30 examples/s]"
      ]
     },
     {
@@ -126498,8 +22813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11125/12318 [19:16:13<2:03:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11125/12318 [19:16:13<2:03:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148932/154911 [02:56<00:10, 551.07 examples/s]"
      ]
     },
     {
@@ -126507,8 +22821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11126/12318 [19:16:18<2:03:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11126/12318 [19:16:18<2:03:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 148989/154911 [02:56<00:11, 527.13 examples/s]"
      ]
     },
     {
@@ -126516,8 +22829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11127/12318 [19:16:22<2:03:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11127/12318 [19:16:22<2:03:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149047/154911 [02:56<00:11, 525.34 examples/s]"
      ]
     },
     {
@@ -126525,8 +22837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11128/12318 [19:16:25<2:03:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11128/12318 [19:16:25<2:03:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149101/154911 [02:57<00:11, 517.33 examples/s]"
      ]
     },
     {
@@ -126534,8 +22845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11129/12318 [19:16:30<2:03:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11129/12318 [19:16:30<2:03:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149153/154911 [02:57<00:11, 515.69 examples/s]"
      ]
     },
     {
@@ -126543,8 +22853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11130/12318 [19:16:33<2:03:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11130/12318 [19:16:33<2:03:26,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149207/154911 [02:57<00:11, 504.42 examples/s]"
      ]
     },
     {
@@ -126552,8 +22861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11131/12318 [19:16:40<2:03:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11131/12318 [19:16:40<2:03:20,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149259/154911 [02:57<00:11, 499.47 examples/s]"
      ]
     },
     {
@@ -126561,8 +22869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11132/12318 [19:16:48<2:03:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11132/12318 [19:16:48<2:03:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149315/154911 [02:57<00:10, 515.59 examples/s]"
      ]
     },
     {
@@ -126570,8 +22877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11133/12318 [19:16:56<2:03:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11133/12318 [19:16:56<2:03:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149367/154911 [02:57<00:10, 504.25 examples/s]"
      ]
     },
     {
@@ -126579,8 +22885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11134/12318 [19:17:05<2:03:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11134/12318 [19:17:05<2:03:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149418/154911 [02:57<00:11, 484.10 examples/s]"
      ]
     },
     {
@@ -126588,8 +22893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11135/12318 [19:17:12<2:02:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11135/12318 [19:17:12<2:02:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  96%|███████▋| 149468/154911 [02:57<00:11, 467.76 examples/s]"
      ]
     },
     {
@@ -126597,8 +22901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11136/12318 [19:17:42<2:02:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11136/12318 [19:17:42<2:02:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149515/154911 [02:57<00:12, 449.39 examples/s]"
      ]
     },
     {
@@ -126606,8 +22909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11137/12318 [19:17:47<2:02:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11137/12318 [19:17:47<2:02:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149561/154911 [02:58<00:12, 433.77 examples/s]"
      ]
     },
     {
@@ -126615,8 +22917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11138/12318 [19:17:54<2:02:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11138/12318 [19:17:54<2:02:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149606/154911 [02:58<00:13, 401.54 examples/s]"
      ]
     },
     {
@@ -126624,8 +22925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11139/12318 [19:18:01<2:02:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11139/12318 [19:18:01<2:02:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149649/154911 [02:58<00:13, 400.22 examples/s]"
      ]
     },
     {
@@ -126633,8 +22933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11140/12318 [19:18:04<2:02:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11140/12318 [19:18:04<2:02:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149694/154911 [02:58<00:12, 404.56 examples/s]"
      ]
     },
     {
@@ -126642,8 +22941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11141/12318 [19:18:09<2:02:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11141/12318 [19:18:09<2:02:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149736/154911 [02:58<00:13, 386.98 examples/s]"
      ]
     },
     {
@@ -126651,8 +22949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11142/12318 [19:18:15<2:02:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11142/12318 [19:18:15<2:02:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149781/154911 [02:58<00:13, 387.05 examples/s]"
      ]
     },
     {
@@ -126660,8 +22957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11143/12318 [19:18:20<2:02:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11143/12318 [19:18:20<2:02:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149822/154911 [02:58<00:13, 386.76 examples/s]"
      ]
     },
     {
@@ -126669,8 +22965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11144/12318 [19:18:25<2:02:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11144/12318 [19:18:25<2:02:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149862/154911 [02:58<00:13, 381.90 examples/s]"
      ]
     },
     {
@@ -126678,8 +22973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11145/12318 [19:18:30<2:01:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11145/12318 [19:18:30<2:01:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149902/154911 [02:58<00:13, 378.43 examples/s]"
      ]
     },
     {
@@ -126687,8 +22981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11146/12318 [19:18:39<2:01:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11146/12318 [19:18:39<2:01:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149940/154911 [02:59<00:13, 366.75 examples/s]"
      ]
     },
     {
@@ -126696,8 +22989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  90%|▉| 11147/12318 [19:18:47<2:01:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  90%|▉| 11147/12318 [19:18:47<2:01:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 149978/154911 [02:59<00:13, 357.36 examples/s]"
      ]
     },
     {
@@ -126705,8 +22997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11148/12318 [19:18:53<2:01:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11148/12318 [19:18:53<2:01:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 150014/154911 [02:59<00:13, 351.17 examples/s]"
      ]
     },
     {
@@ -126714,8 +23005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11149/12318 [19:19:01<2:01:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11149/12318 [19:19:01<2:01:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▋| 150051/154911 [02:59<00:13, 350.42 examples/s]"
      ]
     },
     {
@@ -126723,8 +23013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11150/12318 [19:19:02<2:01:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11150/12318 [19:19:02<2:01:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150089/154911 [02:59<00:13, 355.04 examples/s]"
      ]
     },
     {
@@ -126732,8 +23021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11151/12318 [19:19:04<2:01:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11151/12318 [19:19:04<2:01:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150130/154911 [02:59<00:13, 353.22 examples/s]"
      ]
     },
     {
@@ -126741,8 +23029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11152/12318 [19:19:09<2:01:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11152/12318 [19:19:09<2:01:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150169/154911 [02:59<00:13, 360.57 examples/s]"
      ]
     },
     {
@@ -126750,8 +23037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11153/12318 [19:19:12<2:01:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11153/12318 [19:19:12<2:01:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150207/154911 [02:59<00:13, 343.49 examples/s]"
      ]
     },
     {
@@ -126759,8 +23045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11154/12318 [19:19:16<2:00:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11154/12318 [19:19:16<2:00:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150251/154911 [02:59<00:12, 358.73 examples/s]"
      ]
     },
     {
@@ -126768,8 +23053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11155/12318 [19:19:18<2:00:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11155/12318 [19:19:18<2:00:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150291/154911 [03:00<00:12, 360.07 examples/s]"
      ]
     },
     {
@@ -126777,8 +23061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11156/12318 [19:19:25<2:00:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11156/12318 [19:19:25<2:00:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150334/154911 [03:00<00:12, 363.81 examples/s]"
      ]
     },
     {
@@ -126786,8 +23069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11157/12318 [19:19:32<2:00:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11157/12318 [19:19:32<2:00:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150380/154911 [03:00<00:12, 377.17 examples/s]"
      ]
     },
     {
@@ -126795,8 +23077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11158/12318 [19:19:33<2:00:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11158/12318 [19:19:33<2:00:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150418/154911 [03:00<00:12, 373.75 examples/s]"
      ]
     },
     {
@@ -126804,8 +23085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11159/12318 [19:19:35<2:00:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11159/12318 [19:19:35<2:00:26,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150456/154911 [03:00<00:12, 353.55 examples/s]"
      ]
     },
     {
@@ -126813,8 +23093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11160/12318 [19:19:40<2:00:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11160/12318 [19:19:40<2:00:19,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150497/154911 [03:00<00:12, 365.79 examples/s]"
      ]
     },
     {
@@ -126822,8 +23101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11161/12318 [19:19:49<2:00:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11161/12318 [19:19:49<2:00:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150534/154911 [03:00<00:12, 360.48 examples/s]"
      ]
     },
     {
@@ -126831,8 +23109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11162/12318 [19:19:54<2:00:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11162/12318 [19:19:54<2:00:07,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150581/154911 [03:00<00:11, 381.73 examples/s]"
      ]
     },
     {
@@ -126840,8 +23117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11163/12318 [19:19:58<2:00:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11163/12318 [19:19:58<2:00:01,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150630/154911 [03:00<00:10, 408.80 examples/s]"
      ]
     },
     {
@@ -126849,8 +23125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11164/12318 [19:20:03<1:59:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11164/12318 [19:20:03<1:59:54,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150676/154911 [03:01<00:10, 418.07 examples/s]"
      ]
     },
     {
@@ -126858,8 +23133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11165/12318 [19:20:07<1:59:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11165/12318 [19:20:07<1:59:48,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150722/154911 [03:01<00:09, 420.54 examples/s]"
      ]
     },
     {
@@ -126867,8 +23141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11166/12318 [19:20:13<1:59:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11166/12318 [19:20:13<1:59:42,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150765/154911 [03:01<00:10, 412.55 examples/s]"
      ]
     },
     {
@@ -126876,8 +23149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11167/12318 [19:20:18<1:59:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11167/12318 [19:20:18<1:59:35,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150808/154911 [03:01<00:10, 408.14 examples/s]"
      ]
     },
     {
@@ -126885,8 +23157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11168/12318 [19:20:58<1:59:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11168/12318 [19:20:58<1:59:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150849/154911 [03:01<00:10, 403.75 examples/s]"
      ]
     },
     {
@@ -126894,8 +23165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11169/12318 [19:21:04<1:59:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11169/12318 [19:21:04<1:59:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150891/154911 [03:01<00:09, 405.56 examples/s]"
      ]
     },
     {
@@ -126903,8 +23173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11170/12318 [19:21:10<1:59:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11170/12318 [19:21:10<1:59:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150932/154911 [03:01<00:10, 392.93 examples/s]"
      ]
     },
     {
@@ -126912,8 +23181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11171/12318 [19:21:18<1:59:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11171/12318 [19:21:18<1:59:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 150973/154911 [03:01<00:09, 396.37 examples/s]"
      ]
     },
     {
@@ -126921,8 +23189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11172/12318 [19:21:25<1:59:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11172/12318 [19:21:25<1:59:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  97%|███████▊| 151014/154911 [03:01<00:10, 382.75 examples/s]"
      ]
     },
     {
@@ -126930,8 +23197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11173/12318 [19:21:34<1:59:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11173/12318 [19:21:34<1:59:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151054/154911 [03:02<00:10, 380.21 examples/s]"
      ]
     },
     {
@@ -126939,7 +23205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11174/12318 [19:21:42<1:58:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151094/154911 [03:02<00:10, 380.25 examples/s]"
      ]
     },
     {
@@ -126947,7 +23213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11174/12318 [19:21:42<1:58:56,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151133/154911 [03:02<00:09, 381.58 examples/s]"
      ]
     },
     {
@@ -126955,8 +23221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11175/12318 [19:21:50<1:58:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11175/12318 [19:21:50<1:58:50,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151172/154911 [03:02<00:09, 381.36 examples/s]"
      ]
     },
     {
@@ -126964,8 +23229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11176/12318 [19:21:55<1:58:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11176/12318 [19:21:55<1:58:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151211/154911 [03:02<00:10, 369.89 examples/s]"
      ]
     },
     {
@@ -126973,8 +23237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11177/12318 [19:21:59<1:58:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11177/12318 [19:21:59<1:58:37,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151252/154911 [03:02<00:09, 379.86 examples/s]"
      ]
     },
     {
@@ -126982,8 +23245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11178/12318 [19:22:07<1:58:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11178/12318 [19:22:07<1:58:31,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151294/154911 [03:02<00:09, 387.68 examples/s]"
      ]
     },
     {
@@ -126991,8 +23253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11179/12318 [19:22:10<1:58:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11179/12318 [19:22:10<1:58:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151333/154911 [03:02<00:09, 381.04 examples/s]"
      ]
     },
     {
@@ -127000,8 +23261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11180/12318 [19:22:15<1:58:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11180/12318 [19:22:15<1:58:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151373/154911 [03:02<00:09, 372.33 examples/s]"
      ]
     },
     {
@@ -127009,8 +23269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11181/12318 [19:22:19<1:58:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11181/12318 [19:22:19<1:58:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151411/154911 [03:02<00:09, 370.31 examples/s]"
      ]
     },
     {
@@ -127018,8 +23277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11182/12318 [19:22:21<1:58:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11182/12318 [19:22:21<1:58:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151452/154911 [03:03<00:09, 358.51 examples/s]"
      ]
     },
     {
@@ -127027,7 +23285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11183/12318 [19:22:26<1:57:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151496/154911 [03:03<00:08, 380.83 examples/s]"
      ]
     },
     {
@@ -127035,7 +23293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11183/12318 [19:22:26<1:57:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151536/154911 [03:03<00:09, 367.63 examples/s]"
      ]
     },
     {
@@ -127043,8 +23301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11184/12318 [19:22:31<1:57:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11184/12318 [19:22:31<1:57:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151575/154911 [03:03<00:09, 359.20 examples/s]"
      ]
     },
     {
@@ -127052,8 +23309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11185/12318 [19:22:39<1:57:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11185/12318 [19:22:39<1:57:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151612/154911 [03:03<00:09, 360.71 examples/s]"
      ]
     },
     {
@@ -127061,8 +23317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11186/12318 [19:22:43<1:57:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11186/12318 [19:22:43<1:57:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151651/154911 [03:03<00:08, 364.34 examples/s]"
      ]
     },
     {
@@ -127070,8 +23325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11187/12318 [19:22:47<1:57:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11187/12318 [19:22:47<1:57:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151691/154911 [03:03<00:08, 373.62 examples/s]"
      ]
     },
     {
@@ -127079,8 +23333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11188/12318 [19:22:53<1:57:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11188/12318 [19:22:53<1:57:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151729/154911 [03:03<00:08, 363.83 examples/s]"
      ]
     },
     {
@@ -127088,8 +23341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11189/12318 [19:22:58<1:57:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11189/12318 [19:22:58<1:57:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151767/154911 [03:03<00:08, 368.16 examples/s]"
      ]
     },
     {
@@ -127097,8 +23349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11190/12318 [19:23:04<1:57:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11190/12318 [19:23:04<1:57:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151804/154911 [03:04<00:08, 366.43 examples/s]"
      ]
     },
     {
@@ -127106,8 +23357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11191/12318 [19:23:07<1:57:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11191/12318 [19:23:07<1:57:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151843/154911 [03:04<00:08, 372.20 examples/s]"
      ]
     },
     {
@@ -127115,8 +23365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11192/12318 [19:23:09<1:57:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11192/12318 [19:23:09<1:57:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151881/154911 [03:04<00:08, 370.93 examples/s]"
      ]
     },
     {
@@ -127124,8 +23373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11193/12318 [19:23:17<1:56:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11193/12318 [19:23:17<1:56:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151919/154911 [03:04<00:08, 361.26 examples/s]"
      ]
     },
     {
@@ -127133,8 +23381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11194/12318 [19:23:21<1:56:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11194/12318 [19:23:21<1:56:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151959/154911 [03:04<00:07, 370.42 examples/s]"
      ]
     },
     {
@@ -127142,8 +23389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11195/12318 [19:23:22<1:56:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11195/12318 [19:23:22<1:56:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 151998/154911 [03:04<00:07, 372.11 examples/s]"
      ]
     },
     {
@@ -127151,8 +23397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11196/12318 [19:23:28<1:56:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11196/12318 [19:23:28<1:56:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152037/154911 [03:04<00:07, 361.38 examples/s]"
      ]
     },
     {
@@ -127160,8 +23405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11197/12318 [19:23:37<1:56:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11197/12318 [19:23:37<1:56:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152074/154911 [03:04<00:07, 362.82 examples/s]"
      ]
     },
     {
@@ -127169,8 +23413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11198/12318 [19:23:43<1:56:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11198/12318 [19:23:43<1:56:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152112/154911 [03:04<00:07, 355.27 examples/s]"
      ]
     },
     {
@@ -127178,7 +23421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11199/12318 [19:23:48<1:56:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152150/154911 [03:05<00:07, 358.52 examples/s]"
      ]
     },
     {
@@ -127186,7 +23429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11199/12318 [19:23:48<1:56:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152190/154911 [03:05<00:07, 370.29 examples/s]"
      ]
     },
     {
@@ -127194,8 +23437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11200/12318 [19:24:09<1:56:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11200/12318 [19:24:09<1:56:12,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152229/154911 [03:05<00:07, 364.85 examples/s]"
      ]
     },
     {
@@ -127203,8 +23445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11201/12318 [19:24:35<1:56:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11201/12318 [19:24:35<1:56:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152267/154911 [03:05<00:07, 352.26 examples/s]"
      ]
     },
     {
@@ -127212,8 +23453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11202/12318 [19:24:41<1:56:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11202/12318 [19:24:41<1:56:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152308/154911 [03:05<00:07, 360.87 examples/s]"
      ]
     },
     {
@@ -127221,8 +23461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11203/12318 [19:24:49<1:55:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11203/12318 [19:24:49<1:55:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152371/154911 [03:05<00:05, 435.51 examples/s]"
      ]
     },
     {
@@ -127230,8 +23469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11204/12318 [19:24:56<1:55:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11204/12318 [19:24:56<1:55:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▊| 152477/154911 [03:05<00:03, 609.90 examples/s]"
      ]
     },
     {
@@ -127239,8 +23477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11205/12318 [19:25:00<1:55:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11205/12318 [19:25:00<1:55:43,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  98%|███████▉| 152540/154911 [03:05<00:03, 606.91 examples/s]"
      ]
     },
     {
@@ -127248,8 +23485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11206/12318 [19:25:04<1:55:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11206/12318 [19:25:04<1:55:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152603/154911 [03:05<00:04, 552.07 examples/s]"
      ]
     },
     {
@@ -127257,8 +23493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11207/12318 [19:25:06<1:55:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11207/12318 [19:25:06<1:55:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152661/154911 [03:06<00:04, 453.96 examples/s]"
      ]
     },
     {
@@ -127266,8 +23501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11208/12318 [19:25:08<1:55:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11208/12318 [19:25:08<1:55:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152710/154911 [03:06<00:05, 420.01 examples/s]"
      ]
     },
     {
@@ -127275,8 +23509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11209/12318 [19:25:11<1:55:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11209/12318 [19:25:11<1:55:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152756/154911 [03:06<00:05, 365.70 examples/s]"
      ]
     },
     {
@@ -127284,8 +23517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11210/12318 [19:25:19<1:55:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11210/12318 [19:25:19<1:55:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152797/154911 [03:06<00:05, 353.18 examples/s]"
      ]
     },
     {
@@ -127293,8 +23525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11211/12318 [19:25:24<1:55:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11211/12318 [19:25:24<1:55:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152835/154911 [03:06<00:05, 346.42 examples/s]"
      ]
     },
     {
@@ -127302,8 +23533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11212/12318 [19:25:28<1:54:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11212/12318 [19:25:28<1:54:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152872/154911 [03:06<00:06, 329.55 examples/s]"
      ]
     },
     {
@@ -127311,8 +23541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11213/12318 [19:25:36<1:54:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11213/12318 [19:25:36<1:54:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152906/154911 [03:06<00:06, 313.94 examples/s]"
      ]
     },
     {
@@ -127320,7 +23549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11214/12318 [19:25:41<1:54:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152942/154911 [03:07<00:06, 308.82 examples/s]"
      ]
     },
     {
@@ -127328,7 +23557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11214/12318 [19:25:41<1:54:45,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 152974/154911 [03:07<00:06, 311.30 examples/s]"
      ]
     },
     {
@@ -127336,8 +23565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11215/12318 [19:25:47<1:54:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11215/12318 [19:25:47<1:54:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153007/154911 [03:07<00:06, 292.84 examples/s]"
      ]
     },
     {
@@ -127345,8 +23573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11216/12318 [19:25:52<1:54:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11216/12318 [19:25:52<1:54:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153044/154911 [03:07<00:06, 303.35 examples/s]"
      ]
     },
     {
@@ -127354,8 +23581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11217/12318 [19:25:59<1:54:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11217/12318 [19:25:59<1:54:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153077/154911 [03:07<00:05, 308.58 examples/s]"
      ]
     },
     {
@@ -127363,8 +23589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11218/12318 [19:26:06<1:54:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11218/12318 [19:26:06<1:54:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153109/154911 [03:07<00:05, 300.66 examples/s]"
      ]
     },
     {
@@ -127372,8 +23597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11219/12318 [19:26:14<1:54:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11219/12318 [19:26:14<1:54:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153143/154911 [03:07<00:05, 309.91 examples/s]"
      ]
     },
     {
@@ -127381,8 +23605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11220/12318 [19:26:18<1:54:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11220/12318 [19:26:18<1:54:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153175/154911 [03:07<00:05, 304.08 examples/s]"
      ]
     },
     {
@@ -127390,8 +23613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11221/12318 [19:26:23<1:54:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11221/12318 [19:26:23<1:54:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153207/154911 [03:07<00:05, 298.14 examples/s]"
      ]
     },
     {
@@ -127399,8 +23621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11222/12318 [19:26:32<1:53:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11222/12318 [19:26:32<1:53:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153238/154911 [03:08<00:05, 288.01 examples/s]"
      ]
     },
     {
@@ -127408,8 +23629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11223/12318 [19:26:34<1:53:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11223/12318 [19:26:34<1:53:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153267/154911 [03:08<00:05, 286.02 examples/s]"
      ]
     },
     {
@@ -127417,8 +23637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11224/12318 [19:26:40<1:53:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11224/12318 [19:26:40<1:53:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153297/154911 [03:08<00:05, 269.71 examples/s]"
      ]
     },
     {
@@ -127426,8 +23645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11225/12318 [19:26:48<1:53:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11225/12318 [19:26:48<1:53:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153326/154911 [03:08<00:05, 272.55 examples/s]"
      ]
     },
     {
@@ -127435,8 +23653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11226/12318 [19:26:53<1:53:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11226/12318 [19:26:53<1:53:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153354/154911 [03:08<00:05, 270.65 examples/s]"
      ]
     },
     {
@@ -127444,8 +23661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11227/12318 [19:27:01<1:53:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11227/12318 [19:27:01<1:53:24,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153383/154911 [03:08<00:05, 261.50 examples/s]"
      ]
     },
     {
@@ -127453,8 +23669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11228/12318 [19:27:05<1:53:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11228/12318 [19:27:05<1:53:18,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153413/154911 [03:08<00:05, 268.64 examples/s]"
      ]
     },
     {
@@ -127462,8 +23677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11229/12318 [19:27:12<1:53:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11229/12318 [19:27:12<1:53:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153441/154911 [03:08<00:05, 270.73 examples/s]"
      ]
     },
     {
@@ -127471,8 +23685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11230/12318 [19:27:18<1:53:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11230/12318 [19:27:18<1:53:05,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153469/154911 [03:08<00:05, 267.01 examples/s]"
      ]
     },
     {
@@ -127480,8 +23693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11231/12318 [19:27:23<1:52:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11231/12318 [19:27:23<1:52:59,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153501/154911 [03:09<00:05, 278.50 examples/s]"
      ]
     },
     {
@@ -127489,8 +23701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11232/12318 [19:27:38<1:52:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11232/12318 [19:27:38<1:52:53,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153529/154911 [03:09<00:05, 266.49 examples/s]"
      ]
     },
     {
@@ -127498,8 +23709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11233/12318 [19:27:45<1:52:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11233/12318 [19:27:45<1:52:47,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153557/154911 [03:09<00:05, 265.74 examples/s]"
      ]
     },
     {
@@ -127507,8 +23717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11234/12318 [19:27:54<1:52:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11234/12318 [19:27:54<1:52:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153594/154911 [03:09<00:04, 293.71 examples/s]"
      ]
     },
     {
@@ -127516,8 +23725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11235/12318 [19:27:59<1:52:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11235/12318 [19:27:59<1:52:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153624/154911 [03:09<00:04, 277.53 examples/s]"
      ]
     },
     {
@@ -127525,8 +23733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11236/12318 [19:28:06<1:52:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11236/12318 [19:28:06<1:52:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153654/154911 [03:09<00:04, 256.34 examples/s]"
      ]
     },
     {
@@ -127534,8 +23741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11237/12318 [19:28:11<1:52:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11237/12318 [19:28:11<1:52:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153686/154911 [03:09<00:04, 267.41 examples/s]"
      ]
     },
     {
@@ -127543,8 +23749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11238/12318 [19:28:19<1:52:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11238/12318 [19:28:19<1:52:16,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153714/154911 [03:09<00:04, 261.61 examples/s]"
      ]
     },
     {
@@ -127552,8 +23757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11239/12318 [19:28:23<1:52:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11239/12318 [19:28:23<1:52:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153742/154911 [03:09<00:04, 260.99 examples/s]"
      ]
     },
     {
@@ -127561,8 +23765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11240/12318 [19:28:26<1:52:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11240/12318 [19:28:26<1:52:03,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153774/154911 [03:10<00:04, 275.35 examples/s]"
      ]
     },
     {
@@ -127570,8 +23773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11241/12318 [19:28:33<1:51:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11241/12318 [19:28:33<1:51:57,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153803/154911 [03:10<00:04, 271.13 examples/s]"
      ]
     },
     {
@@ -127579,8 +23781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11242/12318 [19:28:39<1:51:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11242/12318 [19:28:39<1:51:51,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153831/154911 [03:10<00:04, 265.79 examples/s]"
      ]
     },
     {
@@ -127588,8 +23789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11243/12318 [19:28:42<1:51:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11243/12318 [19:28:42<1:51:44,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153858/154911 [03:10<00:04, 253.07 examples/s]"
      ]
     },
     {
@@ -127597,8 +23797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11244/12318 [19:28:48<1:51:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11244/12318 [19:28:48<1:51:38,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153884/154911 [03:10<00:04, 249.11 examples/s]"
      ]
     },
     {
@@ -127606,7 +23805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11245/12318 [19:28:55<1:51:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153911/154911 [03:10<00:03, 250.92 examples/s]"
      ]
     },
     {
@@ -127614,7 +23813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11245/12318 [19:28:55<1:51:32,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153937/154911 [03:10<00:04, 243.15 examples/s]"
      ]
     },
     {
@@ -127622,8 +23821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11246/12318 [19:29:03<1:51:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11246/12318 [19:29:03<1:51:26,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153963/154911 [03:10<00:03, 243.61 examples/s]"
      ]
     },
     {
@@ -127631,8 +23829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11247/12318 [19:29:09<1:51:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11247/12318 [19:29:09<1:51:20,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 153989/154911 [03:10<00:03, 236.31 examples/s]"
      ]
     },
     {
@@ -127640,8 +23837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11248/12318 [19:29:17<1:51:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11248/12318 [19:29:17<1:51:13,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 154017/154911 [03:11<00:03, 236.37 examples/s]"
      ]
     },
     {
@@ -127649,8 +23845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11249/12318 [19:29:23<1:51:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11249/12318 [19:29:23<1:51:07,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 154047/154911 [03:11<00:03, 236.48 examples/s]"
      ]
     },
     {
@@ -127658,8 +23853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11250/12318 [19:29:26<1:51:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11250/12318 [19:29:26<1:51:01,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 154074/154911 [03:11<00:03, 240.94 examples/s]"
      ]
     },
     {
@@ -127667,8 +23861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11251/12318 [19:29:28<1:50:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11251/12318 [19:29:28<1:50:54,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 154100/154911 [03:11<00:03, 238.84 examples/s]"
      ]
     },
     {
@@ -127676,8 +23869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11252/12318 [19:29:36<1:50:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11252/12318 [19:29:36<1:50:48,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64):  99%|███████▉| 154124/154911 [03:11<00:03, 235.18 examples/s]"
      ]
     },
     {
@@ -127685,8 +23877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11253/12318 [19:29:38<1:50:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11253/12318 [19:29:38<1:50:41,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154152/154911 [03:11<00:03, 247.42 examples/s]"
      ]
     },
     {
@@ -127694,8 +23885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11254/12318 [19:29:46<1:50:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11254/12318 [19:29:46<1:50:35,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154177/154911 [03:11<00:03, 241.01 examples/s]"
      ]
     },
     {
@@ -127703,8 +23893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11255/12318 [19:29:55<1:50:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11255/12318 [19:29:55<1:50:29,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154203/154911 [03:11<00:02, 243.83 examples/s]"
      ]
     },
     {
@@ -127712,8 +23901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11256/12318 [19:30:03<1:50:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11256/12318 [19:30:03<1:50:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154228/154911 [03:11<00:02, 236.21 examples/s]"
      ]
     },
     {
@@ -127721,8 +23909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11257/12318 [19:30:11<1:50:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11257/12318 [19:30:11<1:50:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154252/154911 [03:12<00:02, 229.33 examples/s]"
      ]
     },
     {
@@ -127730,8 +23917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11258/12318 [19:30:15<1:50:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11258/12318 [19:30:15<1:50:11,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154277/154911 [03:12<00:02, 225.41 examples/s]"
      ]
     },
     {
@@ -127739,8 +23925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11259/12318 [19:30:20<1:50:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11259/12318 [19:30:20<1:50:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154305/154911 [03:12<00:02, 223.89 examples/s]"
      ]
     },
     {
@@ -127748,8 +23933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11260/12318 [19:30:25<1:49:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11260/12318 [19:30:25<1:49:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154336/154911 [03:12<00:02, 228.57 examples/s]"
      ]
     },
     {
@@ -127757,8 +23941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11261/12318 [19:30:31<1:49:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11261/12318 [19:30:31<1:49:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154364/154911 [03:12<00:02, 236.19 examples/s]"
      ]
     },
     {
@@ -127766,8 +23949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11262/12318 [19:30:40<1:49:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11262/12318 [19:30:40<1:49:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154390/154911 [03:12<00:02, 236.50 examples/s]"
      ]
     },
     {
@@ -127775,8 +23957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11263/12318 [19:30:45<1:49:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11263/12318 [19:30:45<1:49:39,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154417/154911 [03:12<00:02, 238.24 examples/s]"
      ]
     },
     {
@@ -127784,8 +23965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11264/12318 [19:31:02<1:49:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11264/12318 [19:31:02<1:49:34,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154441/154911 [03:12<00:02, 225.42 examples/s]"
      ]
     },
     {
@@ -127793,8 +23973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11265/12318 [19:31:07<1:49:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11265/12318 [19:31:07<1:49:28,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154464/154911 [03:12<00:02, 208.42 examples/s]"
      ]
     },
     {
@@ -127802,8 +23981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11266/12318 [19:31:14<1:49:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11266/12318 [19:31:14<1:49:22,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154486/154911 [03:13<00:02, 205.50 examples/s]"
      ]
     },
     {
@@ -127811,8 +23989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11267/12318 [19:31:15<1:49:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11267/12318 [19:31:15<1:49:15,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154507/154911 [03:13<00:02, 190.90 examples/s]"
      ]
     },
     {
@@ -127820,8 +23997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11268/12318 [19:31:18<1:49:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11268/12318 [19:31:18<1:49:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154527/154911 [03:13<00:01, 192.47 examples/s]"
      ]
     },
     {
@@ -127829,8 +24005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11269/12318 [19:31:20<1:49:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11269/12318 [19:31:20<1:49:02,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154547/154911 [03:13<00:01, 185.65 examples/s]"
      ]
     },
     {
@@ -127838,8 +24013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  91%|▉| 11270/12318 [19:31:24<1:48:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  91%|▉| 11270/12318 [19:31:24<1:48:55,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154567/154911 [03:13<00:01, 188.52 examples/s]"
      ]
     },
     {
@@ -127847,8 +24021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11271/12318 [19:31:26<1:48:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11271/12318 [19:31:26<1:48:49,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154611/154911 [03:13<00:01, 257.23 examples/s]"
      ]
     },
     {
@@ -127856,8 +24029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11272/12318 [19:31:30<1:48:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11272/12318 [19:31:30<1:48:42,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154638/154911 [03:13<00:01, 202.02 examples/s]"
      ]
     },
     {
@@ -127865,8 +24037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11273/12318 [19:31:36<1:48:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11273/12318 [19:31:36<1:48:36,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154662/154911 [03:14<00:01, 186.66 examples/s]"
      ]
     },
     {
@@ -127874,8 +24045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11274/12318 [19:31:41<1:48:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11274/12318 [19:31:41<1:48:30,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154684/154911 [03:14<00:01, 174.17 examples/s]"
      ]
     },
     {
@@ -127883,8 +24053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11275/12318 [19:31:48<1:48:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11275/12318 [19:31:48<1:48:23,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154703/154911 [03:14<00:01, 165.86 examples/s]"
      ]
     },
     {
@@ -127892,8 +24061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11276/12318 [19:31:51<1:48:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11276/12318 [19:31:51<1:48:17,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154721/154911 [03:14<00:01, 155.34 examples/s]"
      ]
     },
     {
@@ -127901,8 +24069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11277/12318 [19:31:55<1:48:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11277/12318 [19:31:55<1:48:10,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154739/154911 [03:14<00:01, 144.07 examples/s]"
      ]
     },
     {
@@ -127910,7 +24077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11278/12318 [19:32:04<1:48:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154754/154911 [03:14<00:01, 122.30 examples/s]"
      ]
     },
     {
@@ -127918,7 +24085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11278/12318 [19:32:04<1:48:04,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154767/154911 [03:14<00:01, 108.45 examples/s]"
      ]
     },
     {
@@ -127926,8 +24093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11279/12318 [19:32:13<1:47:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11279/12318 [19:32:13<1:47:58,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154780/154911 [03:15<00:01, 104.69 examples/s]"
      ]
     },
     {
@@ -127935,8 +24101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11280/12318 [19:32:16<1:47:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11280/12318 [19:32:16<1:47:52,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|███████▉| 154792/154911 [03:15<00:01, 100.81 examples/s]"
      ]
     },
     {
@@ -127944,8 +24109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11281/12318 [19:32:21<1:47:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11281/12318 [19:32:21<1:47:46,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154804/154911 [03:15<00:01, 97.90 examples/s]"
      ]
     },
     {
@@ -127953,8 +24117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11282/12318 [19:32:29<1:47:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11282/12318 [19:32:29<1:47:40,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154815/154911 [03:15<00:01, 92.96 examples/s]"
      ]
     },
     {
@@ -127962,8 +24125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11283/12318 [19:32:37<1:47:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11283/12318 [19:32:37<1:47:33,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154827/154911 [03:15<00:00, 92.31 examples/s]"
      ]
     },
     {
@@ -127971,8 +24133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11284/12318 [19:32:42<1:47:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11284/12318 [19:32:42<1:47:27,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154839/154911 [03:15<00:00, 91.67 examples/s]"
      ]
     },
     {
@@ -127980,8 +24141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11285/12318 [19:32:46<1:47:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11285/12318 [19:32:46<1:47:21,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154851/154911 [03:15<00:00, 91.33 examples/s]"
      ]
     },
     {
@@ -127989,8 +24149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11286/12318 [19:32:49<1:47:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11286/12318 [19:32:49<1:47:14,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154862/154911 [03:15<00:00, 88.76 examples/s]"
      ]
     },
     {
@@ -127998,8 +24157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11287/12318 [19:32:57<1:47:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11287/12318 [19:32:57<1:47:08,  6.24s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154873/154911 [03:16<00:00, 86.89 examples/s]"
      ]
     },
     {
@@ -128007,8 +24165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11288/12318 [19:33:00<1:47:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11288/12318 [19:33:00<1:47:02,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154882/154911 [03:16<00:00, 77.72 examples/s]"
      ]
     },
     {
@@ -128016,8 +24173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11289/12318 [19:33:05<1:46:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11289/12318 [19:33:05<1:46:55,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154890/154911 [03:16<00:00, 72.83 examples/s]"
      ]
     },
     {
@@ -128025,8 +24181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11290/12318 [19:33:12<1:46:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11290/12318 [19:33:12<1:46:49,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154898/154911 [03:16<00:00, 69.28 examples/s]"
      ]
     },
     {
@@ -128034,8 +24189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11291/12318 [19:33:16<1:46:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11291/12318 [19:33:16<1:46:43,  6.23s/it, v_num=e4xv, train/los"
+      "Map (num_proc=64): 100%|████████▉| 154906/154911 [03:16<00:00, 51.75 examples/s]"
      ]
     },
     {
@@ -128043,8 +24197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11292/12318 [19:33:24<1:46:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11292/12318 [19:33:24<1:46:37,  6.23s/it, v_num=e4xv, train/los"
+      "                                                                                \r"
      ]
     },
     {
@@ -128052,8 +24205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11293/12318 [19:33:25<1:46:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11293/12318 [19:33:25<1:46:30,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   0%|                   | 0/154911 [00:00<?, ? examples/s]"
      ]
     },
     {
@@ -128061,8 +24213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11294/12318 [19:33:27<1:46:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11294/12318 [19:33:27<1:46:23,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   1%|       | 1000/154911 [00:02<05:34, 459.54 examples/s]"
      ]
     },
     {
@@ -128070,8 +24221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11295/12318 [19:33:32<1:46:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11295/12318 [19:33:32<1:46:17,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   1%|       | 2000/154911 [00:02<03:19, 768.19 examples/s]"
      ]
     },
     {
@@ -128079,8 +24229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11296/12318 [19:34:13<1:46:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11296/12318 [19:34:13<1:46:14,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   2%|▏      | 3000/154911 [00:03<03:00, 843.07 examples/s]"
      ]
     },
     {
@@ -128088,8 +24237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11297/12318 [19:34:20<1:46:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11297/12318 [19:34:20<1:46:08,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   3%|▏      | 4000/154911 [00:05<03:01, 831.94 examples/s]"
      ]
     },
     {
@@ -128097,8 +24245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11298/12318 [19:34:25<1:46:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11298/12318 [19:34:25<1:46:01,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   3%|▏     | 5000/154911 [00:05<02:01, 1230.93 examples/s]"
      ]
     },
     {
@@ -128106,8 +24253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11299/12318 [19:34:31<1:45:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11299/12318 [19:34:31<1:45:55,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   4%|▏     | 6000/154911 [00:05<01:35, 1564.57 examples/s]"
      ]
     },
     {
@@ -128115,8 +24261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11300/12318 [19:34:38<1:45:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11300/12318 [19:34:38<1:45:49,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   5%|▎     | 7000/154911 [00:05<01:20, 1838.62 examples/s]"
      ]
     },
     {
@@ -128124,8 +24269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11301/12318 [19:34:44<1:45:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11301/12318 [19:34:44<1:45:43,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   5%|▎     | 8000/154911 [00:06<01:16, 1909.02 examples/s]"
      ]
     },
     {
@@ -128133,8 +24277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11302/12318 [19:34:47<1:45:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11302/12318 [19:34:47<1:45:36,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   6%|▎    | 10000/154911 [00:06<00:53, 2694.55 examples/s]"
      ]
     },
     {
@@ -128142,8 +24285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11303/12318 [19:34:54<1:45:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11303/12318 [19:34:54<1:45:30,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   7%|▎    | 11000/154911 [00:07<00:51, 2815.55 examples/s]"
      ]
     },
     {
@@ -128151,8 +24293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11304/12318 [19:34:59<1:45:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11304/12318 [19:34:59<1:45:23,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   7%|▎    | 11420/154911 [00:07<00:59, 2403.24 examples/s]"
      ]
     },
     {
@@ -128160,8 +24301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11305/12318 [19:35:04<1:45:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11305/12318 [19:35:04<1:45:17,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   8%|▍    | 12420/154911 [00:08<01:06, 2150.92 examples/s]"
      ]
     },
     {
@@ -128169,8 +24309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11306/12318 [19:35:10<1:45:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11306/12318 [19:35:10<1:45:11,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   9%|▍    | 13420/154911 [00:08<00:55, 2565.24 examples/s]"
      ]
     },
     {
@@ -128178,8 +24317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11307/12318 [19:35:17<1:45:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11307/12318 [19:35:17<1:45:05,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):   9%|▍    | 14420/154911 [00:08<00:45, 3072.77 examples/s]"
      ]
     },
     {
@@ -128187,8 +24325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11308/12318 [19:35:18<1:44:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11308/12318 [19:35:18<1:44:58,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  10%|▍    | 15420/154911 [00:08<00:46, 3015.70 examples/s]"
      ]
     },
     {
@@ -128196,8 +24333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11309/12318 [19:35:27<1:44:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11309/12318 [19:35:27<1:44:52,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  11%|▌    | 16420/154911 [00:08<00:40, 3433.18 examples/s]"
      ]
     },
     {
@@ -128205,8 +24341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11310/12318 [19:35:30<1:44:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11310/12318 [19:35:30<1:44:46,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  11%|▌    | 17420/154911 [00:09<00:38, 3580.05 examples/s]"
      ]
     },
     {
@@ -128214,8 +24349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11311/12318 [19:35:35<1:44:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11311/12318 [19:35:35<1:44:39,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  12%|▌    | 18420/154911 [00:09<00:31, 4288.64 examples/s]"
      ]
     },
     {
@@ -128223,8 +24357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11312/12318 [19:35:36<1:44:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11312/12318 [19:35:36<1:44:32,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  13%|▋    | 19420/154911 [00:09<00:26, 5061.81 examples/s]"
      ]
     },
     {
@@ -128232,8 +24365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11313/12318 [19:35:40<1:44:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11313/12318 [19:35:40<1:44:26,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  13%|▋    | 20841/154911 [00:09<00:26, 5127.70 examples/s]"
      ]
     },
     {
@@ -128241,8 +24373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11314/12318 [19:35:44<1:44:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11314/12318 [19:35:44<1:44:20,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  14%|▋    | 21841/154911 [00:10<00:40, 3288.37 examples/s]"
      ]
     },
     {
@@ -128250,8 +24381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11315/12318 [19:35:48<1:44:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11315/12318 [19:35:48<1:44:13,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  15%|▋    | 22841/154911 [00:10<00:37, 3486.32 examples/s]"
      ]
     },
     {
@@ -128259,8 +24389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11316/12318 [19:35:55<1:44:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11316/12318 [19:35:55<1:44:07,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  16%|▊    | 24261/154911 [00:10<00:36, 3599.36 examples/s]"
      ]
     },
     {
@@ -128268,8 +24397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11317/12318 [19:35:57<1:44:00,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11317/12318 [19:35:57<1:44:00,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  17%|▊    | 26261/154911 [00:11<00:28, 4591.92 examples/s]"
      ]
     },
     {
@@ -128277,8 +24405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11318/12318 [19:36:04<1:43:54,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11318/12318 [19:36:04<1:43:54,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  18%|▉    | 27261/154911 [00:11<00:28, 4413.84 examples/s]"
      ]
     },
     {
@@ -128286,8 +24413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11319/12318 [19:36:10<1:43:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11319/12318 [19:36:10<1:43:48,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  19%|▉    | 28682/154911 [00:12<00:36, 3468.01 examples/s]"
      ]
     },
     {
@@ -128295,8 +24421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11320/12318 [19:36:15<1:43:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11320/12318 [19:36:15<1:43:42,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  20%|▉    | 30682/154911 [00:12<00:28, 4338.27 examples/s]"
      ]
     },
     {
@@ -128304,8 +24429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11321/12318 [19:36:16<1:43:35,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11321/12318 [19:36:16<1:43:35,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  20%|█    | 31682/154911 [00:12<00:28, 4346.60 examples/s]"
      ]
     },
     {
@@ -128313,7 +24437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11322/12318 [19:36:24<1:43:29,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  21%|█    | 33103/154911 [00:12<00:27, 4367.69 examples/s]"
      ]
     },
     {
@@ -128321,7 +24445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11322/12318 [19:36:24<1:43:29,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  22%|█    | 34103/154911 [00:13<00:29, 4162.52 examples/s]"
      ]
     },
     {
@@ -128329,8 +24453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11323/12318 [19:36:32<1:43:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11323/12318 [19:36:32<1:43:23,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  23%|█▏   | 35524/154911 [00:13<00:24, 4793.58 examples/s]"
      ]
     },
     {
@@ -128338,8 +24461,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11324/12318 [19:36:39<1:43:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11324/12318 [19:36:39<1:43:17,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  24%|█▏   | 36524/154911 [00:13<00:33, 3579.17 examples/s]"
      ]
     },
     {
@@ -128347,8 +24469,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11325/12318 [19:36:47<1:43:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11325/12318 [19:36:47<1:43:11,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  24%|█▏   | 37524/154911 [00:14<00:30, 3848.29 examples/s]"
      ]
     },
     {
@@ -128356,8 +24477,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11326/12318 [19:36:49<1:43:04,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11326/12318 [19:36:49<1:43:04,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  25%|█▏   | 38524/154911 [00:14<00:30, 3828.52 examples/s]"
      ]
     },
     {
@@ -128365,8 +24485,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11327/12318 [19:36:57<1:42:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11327/12318 [19:36:57<1:42:58,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  26%|█▎   | 39524/154911 [00:14<00:25, 4601.80 examples/s]"
      ]
     },
     {
@@ -128374,8 +24493,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11328/12318 [19:37:41<1:42:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11328/12318 [19:37:41<1:42:55,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  26%|█▎   | 40524/154911 [00:14<00:26, 4377.13 examples/s]"
      ]
     },
     {
@@ -128383,7 +24501,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11329/12318 [19:37:49<1:42:49,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  27%|█▎   | 41945/154911 [00:15<00:29, 3857.71 examples/s]"
      ]
     },
     {
@@ -128391,7 +24509,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11329/12318 [19:37:49<1:42:49,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  28%|█▍   | 43945/154911 [00:15<00:21, 5124.75 examples/s]"
      ]
     },
     {
@@ -128399,8 +24517,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11330/12318 [19:37:52<1:42:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11330/12318 [19:37:52<1:42:42,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  30%|█▍   | 45945/154911 [00:15<00:16, 6441.62 examples/s]"
      ]
     },
     {
@@ -128408,8 +24525,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11331/12318 [19:37:59<1:42:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11331/12318 [19:37:59<1:42:36,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  31%|█▌   | 47365/154911 [00:15<00:15, 6913.73 examples/s]"
      ]
     },
     {
@@ -128417,8 +24533,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11332/12318 [19:38:07<1:42:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11332/12318 [19:38:07<1:42:30,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  31%|█▌   | 48786/154911 [00:16<00:27, 3802.25 examples/s]"
      ]
     },
     {
@@ -128426,8 +24541,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11333/12318 [19:38:12<1:42:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11333/12318 [19:38:12<1:42:24,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  32%|█▌   | 50207/154911 [00:16<00:24, 4237.32 examples/s]"
      ]
     },
     {
@@ -128435,8 +24549,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11334/12318 [19:38:20<1:42:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11334/12318 [19:38:20<1:42:18,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  33%|█▋   | 51628/154911 [00:17<00:24, 4197.04 examples/s]"
      ]
     },
     {
@@ -128444,8 +24557,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11335/12318 [19:38:25<1:42:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11335/12318 [19:38:25<1:42:11,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  34%|█▋   | 53049/154911 [00:17<00:28, 3582.19 examples/s]"
      ]
     },
     {
@@ -128453,8 +24565,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11336/12318 [19:38:28<1:42:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11336/12318 [19:38:28<1:42:05,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  36%|█▊   | 56470/154911 [00:17<00:18, 5290.92 examples/s]"
      ]
     },
     {
@@ -128462,8 +24573,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11337/12318 [19:38:29<1:41:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11337/12318 [19:38:29<1:41:58,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  37%|█▊   | 57470/154911 [00:18<00:19, 4986.39 examples/s]"
      ]
     },
     {
@@ -128471,8 +24581,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11338/12318 [19:38:32<1:41:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11338/12318 [19:38:32<1:41:52,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  38%|█▉   | 59470/154911 [00:18<00:18, 5051.94 examples/s]"
      ]
     },
     {
@@ -128480,8 +24589,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11339/12318 [19:38:35<1:41:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11339/12318 [19:38:35<1:41:45,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  40%|█▉   | 61470/154911 [00:18<00:15, 6137.05 examples/s]"
      ]
     },
     {
@@ -128489,8 +24597,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11340/12318 [19:38:41<1:41:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11340/12318 [19:38:41<1:41:39,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  41%|██   | 62891/154911 [00:19<00:15, 5899.43 examples/s]"
      ]
     },
     {
@@ -128498,8 +24605,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11341/12318 [19:38:42<1:41:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11341/12318 [19:38:42<1:41:32,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  41%|██   | 63891/154911 [00:19<00:16, 5461.65 examples/s]"
      ]
     },
     {
@@ -128507,8 +24613,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11342/12318 [19:38:45<1:41:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11342/12318 [19:38:45<1:41:26,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  42%|██   | 64891/154911 [00:19<00:15, 5919.92 examples/s]"
      ]
     },
     {
@@ -128516,8 +24621,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11343/12318 [19:38:53<1:41:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11343/12318 [19:38:53<1:41:19,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  43%|██▏  | 65891/154911 [00:19<00:14, 6337.39 examples/s]"
      ]
     },
     {
@@ -128525,8 +24629,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11344/12318 [19:38:55<1:41:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11344/12318 [19:38:55<1:41:13,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  44%|██▏  | 68312/154911 [00:20<00:23, 3676.07 examples/s]"
      ]
     },
     {
@@ -128534,8 +24637,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11345/12318 [19:39:01<1:41:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11345/12318 [19:39:01<1:41:07,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  45%|██▎  | 70312/154911 [00:20<00:16, 5093.36 examples/s]"
      ]
     },
     {
@@ -128543,7 +24645,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11346/12318 [19:39:06<1:41:00,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  46%|██▎  | 71733/154911 [00:21<00:18, 4426.84 examples/s]"
      ]
     },
     {
@@ -128551,7 +24653,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11346/12318 [19:39:06<1:41:00,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  48%|██▍  | 73733/154911 [00:21<00:15, 5226.79 examples/s]"
      ]
     },
     {
@@ -128559,8 +24661,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11347/12318 [19:39:14<1:40:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11347/12318 [19:39:14<1:40:54,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  49%|██▍  | 76154/154911 [00:22<00:20, 3878.81 examples/s]"
      ]
     },
     {
@@ -128568,8 +24669,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11348/12318 [19:39:19<1:40:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11348/12318 [19:39:19<1:40:48,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  50%|██▍  | 77154/154911 [00:22<00:18, 4303.67 examples/s]"
      ]
     },
     {
@@ -128577,8 +24677,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11349/12318 [19:39:22<1:40:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11349/12318 [19:39:22<1:40:41,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  51%|██▌  | 79154/154911 [00:22<00:15, 4924.90 examples/s]"
      ]
     },
     {
@@ -128586,8 +24685,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11350/12318 [19:39:28<1:40:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11350/12318 [19:39:28<1:40:35,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  52%|██▌  | 80154/154911 [00:22<00:16, 4445.17 examples/s]"
      ]
     },
     {
@@ -128595,8 +24693,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11351/12318 [19:39:35<1:40:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11351/12318 [19:39:35<1:40:29,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  53%|██▋  | 82154/154911 [00:23<00:12, 5887.48 examples/s]"
      ]
     },
     {
@@ -128604,8 +24701,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11352/12318 [19:39:39<1:40:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11352/12318 [19:39:39<1:40:22,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  54%|██▋  | 83154/154911 [00:23<00:12, 5901.77 examples/s]"
      ]
     },
     {
@@ -128613,7 +24709,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11353/12318 [19:39:43<1:40:16,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  57%|██▊  | 87575/154911 [00:23<00:07, 8819.98 examples/s]"
      ]
     },
     {
@@ -128621,7 +24717,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11353/12318 [19:39:43<1:40:16,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  58%|██▉  | 89417/154911 [00:24<00:13, 4843.40 examples/s]"
      ]
     },
     {
@@ -128629,8 +24725,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11354/12318 [19:39:46<1:40:10,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11354/12318 [19:39:46<1:40:10,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  59%|██▉  | 90838/154911 [00:24<00:13, 4792.61 examples/s]"
      ]
     },
     {
@@ -128638,8 +24733,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11355/12318 [19:39:49<1:40:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11355/12318 [19:39:49<1:40:03,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  59%|██▉  | 91838/154911 [00:24<00:12, 4998.91 examples/s]"
      ]
     },
     {
@@ -128647,8 +24741,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11356/12318 [19:39:53<1:39:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11356/12318 [19:39:53<1:39:57,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  60%|███  | 93259/154911 [00:25<00:12, 5025.16 examples/s]"
      ]
     },
     {
@@ -128656,8 +24749,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11357/12318 [19:39:57<1:39:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11357/12318 [19:39:57<1:39:50,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  61%|███  | 94259/154911 [00:25<00:10, 5567.34 examples/s]"
      ]
     },
     {
@@ -128665,8 +24757,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11358/12318 [19:40:00<1:39:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11358/12318 [19:40:00<1:39:44,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  61%|███  | 95259/154911 [00:25<00:10, 5698.80 examples/s]"
      ]
     },
     {
@@ -128674,8 +24765,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11359/12318 [19:40:07<1:39:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11359/12318 [19:40:07<1:39:38,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  62%|███  | 96259/154911 [00:25<00:13, 4473.50 examples/s]"
      ]
     },
     {
@@ -128683,8 +24773,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11360/12318 [19:41:00<1:39:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11360/12318 [19:41:00<1:39:35,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  63%|███▏ | 97259/154911 [00:26<00:15, 3832.45 examples/s]"
      ]
     },
     {
@@ -128692,8 +24781,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11361/12318 [19:41:02<1:39:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11361/12318 [19:41:02<1:39:29,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  63%|███▏ | 98259/154911 [00:26<00:14, 3803.09 examples/s]"
      ]
     },
     {
@@ -128701,8 +24789,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11362/12318 [19:41:03<1:39:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11362/12318 [19:41:03<1:39:22,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  64%|███▏ | 99101/154911 [00:26<00:14, 3929.82 examples/s]"
      ]
     },
     {
@@ -128710,8 +24797,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11363/12318 [19:41:06<1:39:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11363/12318 [19:41:06<1:39:15,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  65%|██▌ | 100101/154911 [00:26<00:13, 4024.31 examples/s]"
      ]
     },
     {
@@ -128719,8 +24805,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11364/12318 [19:41:12<1:39:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11364/12318 [19:41:12<1:39:09,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  65%|██▌ | 101101/154911 [00:27<00:11, 4763.93 examples/s]"
      ]
     },
     {
@@ -128728,8 +24813,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11365/12318 [19:41:14<1:39:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11365/12318 [19:41:14<1:39:03,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  66%|██▋ | 102522/154911 [00:27<00:10, 4970.36 examples/s]"
      ]
     },
     {
@@ -128737,8 +24821,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11366/12318 [19:41:16<1:38:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11366/12318 [19:41:16<1:38:56,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  67%|██▋ | 103943/154911 [00:27<00:11, 4356.11 examples/s]"
      ]
     },
     {
@@ -128746,8 +24829,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11367/12318 [19:41:22<1:38:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11367/12318 [19:41:22<1:38:50,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  68%|██▋ | 104943/154911 [00:27<00:10, 4792.70 examples/s]"
      ]
     },
     {
@@ -128755,8 +24837,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11368/12318 [19:41:24<1:38:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11368/12318 [19:41:24<1:38:43,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  68%|██▋ | 105943/154911 [00:28<00:16, 2900.23 examples/s]"
      ]
     },
     {
@@ -128764,8 +24845,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11369/12318 [19:41:31<1:38:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11369/12318 [19:41:31<1:38:37,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  69%|██▊ | 106943/154911 [00:28<00:13, 3609.60 examples/s]"
      ]
     },
     {
@@ -128773,8 +24853,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11370/12318 [19:41:35<1:38:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11370/12318 [19:41:35<1:38:31,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  70%|██▊ | 107943/154911 [00:28<00:12, 3629.47 examples/s]"
      ]
     },
     {
@@ -128782,8 +24861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11371/12318 [19:41:42<1:38:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11371/12318 [19:41:42<1:38:24,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  71%|██▊ | 109943/154911 [00:29<00:08, 5374.89 examples/s]"
      ]
     },
     {
@@ -128791,8 +24869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11372/12318 [19:41:48<1:38:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11372/12318 [19:41:48<1:38:18,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  72%|██▊ | 110785/154911 [00:29<00:15, 2815.56 examples/s]"
      ]
     },
     {
@@ -128800,8 +24877,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11373/12318 [19:41:57<1:38:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11373/12318 [19:41:57<1:38:12,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  72%|██▉ | 112206/154911 [00:30<00:12, 3336.85 examples/s]"
      ]
     },
     {
@@ -128809,8 +24885,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11374/12318 [19:42:03<1:38:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11374/12318 [19:42:03<1:38:06,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  73%|██▉ | 113627/154911 [00:30<00:14, 2808.32 examples/s]"
      ]
     },
     {
@@ -128818,8 +24893,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11375/12318 [19:42:09<1:38:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11375/12318 [19:42:09<1:38:00,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  75%|██▉ | 115627/154911 [00:30<00:09, 4182.60 examples/s]"
      ]
     },
     {
@@ -128827,8 +24901,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11376/12318 [19:42:12<1:37:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11376/12318 [19:42:12<1:37:53,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  76%|███ | 117048/154911 [00:32<00:14, 2626.99 examples/s]"
      ]
     },
     {
@@ -128836,8 +24909,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11377/12318 [19:42:21<1:37:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11377/12318 [19:42:21<1:37:47,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  76%|███ | 118469/154911 [00:32<00:13, 2762.05 examples/s]"
      ]
     },
     {
@@ -128845,8 +24917,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11378/12318 [19:42:29<1:37:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11378/12318 [19:42:29<1:37:41,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  77%|███ | 119311/154911 [00:32<00:14, 2433.37 examples/s]"
      ]
     },
     {
@@ -128854,8 +24925,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11379/12318 [19:42:32<1:37:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11379/12318 [19:42:32<1:37:34,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  78%|███ | 120311/154911 [00:33<00:12, 2864.96 examples/s]"
      ]
     },
     {
@@ -128863,8 +24933,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11380/12318 [19:42:35<1:37:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11380/12318 [19:42:35<1:37:28,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  78%|███▏| 121311/154911 [00:33<00:11, 2871.15 examples/s]"
      ]
     },
     {
@@ -128872,8 +24941,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11381/12318 [19:42:38<1:37:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11381/12318 [19:42:38<1:37:22,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  79%|███▏| 122311/154911 [00:33<00:11, 2923.96 examples/s]"
      ]
     },
     {
@@ -128881,8 +24949,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11382/12318 [19:42:44<1:37:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11382/12318 [19:42:44<1:37:15,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  80%|███▏| 123731/154911 [00:34<00:09, 3353.62 examples/s]"
      ]
     },
     {
@@ -128890,8 +24957,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11383/12318 [19:42:51<1:37:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11383/12318 [19:42:51<1:37:09,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  80%|███▏| 124571/154911 [00:34<00:10, 2812.12 examples/s]"
      ]
     },
     {
@@ -128899,8 +24965,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11384/12318 [19:42:58<1:37:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11384/12318 [19:42:58<1:37:03,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  81%|███▏| 125571/154911 [00:35<00:11, 2547.54 examples/s]"
      ]
     },
     {
@@ -128908,8 +24973,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11385/12318 [19:42:59<1:36:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11385/12318 [19:42:59<1:36:56,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  81%|███▎| 125991/154911 [00:35<00:11, 2559.31 examples/s]"
      ]
     },
     {
@@ -128917,8 +24981,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11386/12318 [19:43:03<1:36:50,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11386/12318 [19:43:03<1:36:50,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  82%|███▎| 126411/154911 [00:35<00:12, 2300.16 examples/s]"
      ]
     },
     {
@@ -128926,8 +24989,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11387/12318 [19:43:12<1:36:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11387/12318 [19:43:12<1:36:44,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  82%|███▎| 127411/154911 [00:35<00:09, 2837.04 examples/s]"
      ]
     },
     {
@@ -128935,7 +24997,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11388/12318 [19:43:17<1:36:38,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  83%|███▎| 128831/154911 [00:36<00:08, 2908.31 examples/s]"
      ]
     },
     {
@@ -128943,7 +25005,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11388/12318 [19:43:17<1:36:38,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  84%|███▎| 130251/154911 [00:36<00:07, 3296.82 examples/s]"
      ]
     },
     {
@@ -128951,8 +25013,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11389/12318 [19:43:25<1:36:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11389/12318 [19:43:25<1:36:31,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  85%|███▍| 132251/154911 [00:36<00:04, 4780.57 examples/s]"
      ]
     },
     {
@@ -128960,8 +25021,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11390/12318 [19:43:31<1:36:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11390/12318 [19:43:31<1:36:25,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  87%|███▍| 134251/154911 [00:36<00:03, 6073.59 examples/s]"
      ]
     },
     {
@@ -128969,8 +25029,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11391/12318 [19:43:34<1:36:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11391/12318 [19:43:34<1:36:19,  6.23s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  87%|███▍| 135091/154911 [00:37<00:04, 4492.84 examples/s]"
      ]
     },
     {
@@ -128978,8 +25037,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11392/12318 [19:44:23<1:36:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11392/12318 [19:44:23<1:36:16,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  88%|███▌| 135931/154911 [00:38<00:10, 1811.01 examples/s]"
      ]
     },
     {
@@ -128987,8 +25045,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11393/12318 [19:44:28<1:36:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11393/12318 [19:44:28<1:36:10,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  89%|███▌| 137931/154911 [00:39<00:07, 2420.72 examples/s]"
      ]
     },
     {
@@ -128996,8 +25053,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  92%|▉| 11394/12318 [19:44:37<1:36:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  92%|▉| 11394/12318 [19:44:37<1:36:04,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  90%|███▌| 138931/154911 [00:39<00:05, 2671.36 examples/s]"
      ]
     },
     {
@@ -129005,8 +25061,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11395/12318 [19:44:39<1:35:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11395/12318 [19:44:39<1:35:57,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  90%|███▌| 139771/154911 [00:39<00:06, 2339.05 examples/s]"
      ]
     },
     {
@@ -129014,8 +25069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11396/12318 [19:44:43<1:35:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11396/12318 [19:44:43<1:35:51,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  91%|███▋| 140771/154911 [00:40<00:05, 2768.89 examples/s]"
      ]
     },
     {
@@ -129023,8 +25077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11397/12318 [19:44:44<1:35:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11397/12318 [19:44:44<1:35:44,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  92%|███▋| 141771/154911 [00:40<00:03, 3287.23 examples/s]"
      ]
     },
     {
@@ -129032,8 +25085,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11398/12318 [19:44:48<1:35:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11398/12318 [19:44:48<1:35:37,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  92%|███▋| 142611/154911 [00:41<00:06, 1888.66 examples/s]"
      ]
     },
     {
@@ -129041,8 +25093,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11399/12318 [19:44:56<1:35:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11399/12318 [19:44:56<1:35:31,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  93%|███▋| 143611/154911 [00:41<00:04, 2359.89 examples/s]"
      ]
     },
     {
@@ -129050,8 +25101,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11400/12318 [19:45:00<1:35:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11400/12318 [19:45:00<1:35:25,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  94%|███▋| 145031/154911 [00:41<00:03, 3051.32 examples/s]"
      ]
     },
     {
@@ -129059,8 +25109,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11401/12318 [19:45:04<1:35:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11401/12318 [19:45:04<1:35:19,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  95%|███▊| 146451/154911 [00:41<00:02, 3628.27 examples/s]"
      ]
     },
     {
@@ -129068,8 +25117,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11402/12318 [19:45:13<1:35:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11402/12318 [19:45:13<1:35:13,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  95%|███▊| 147291/154911 [00:42<00:02, 3346.41 examples/s]"
      ]
     },
     {
@@ -129077,8 +25125,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11403/12318 [19:45:15<1:35:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11403/12318 [19:45:15<1:35:06,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  96%|███▊| 148291/154911 [00:42<00:02, 3125.58 examples/s]"
      ]
     },
     {
@@ -129086,8 +25133,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11404/12318 [19:45:23<1:35:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11404/12318 [19:45:23<1:35:00,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  96%|███▊| 149291/154911 [00:43<00:01, 3112.35 examples/s]"
      ]
     },
     {
@@ -129095,8 +25141,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11405/12318 [19:45:28<1:34:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11405/12318 [19:45:28<1:34:54,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  97%|███▉| 150291/154911 [00:43<00:01, 2604.11 examples/s]"
      ]
     },
     {
@@ -129104,8 +25149,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11406/12318 [19:45:34<1:34:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11406/12318 [19:45:34<1:34:47,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  97%|███▉| 150711/154911 [00:44<00:03, 1288.88 examples/s]"
      ]
     },
     {
@@ -129113,8 +25157,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11407/12318 [19:45:35<1:34:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11407/12318 [19:45:35<1:34:41,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  98%|███▉| 151131/154911 [00:45<00:02, 1330.86 examples/s]"
      ]
     },
     {
@@ -129122,8 +25165,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11408/12318 [19:45:39<1:34:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11408/12318 [19:45:39<1:34:34,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  98%|███▉| 151551/154911 [00:45<00:02, 1451.03 examples/s]"
      ]
     },
     {
@@ -129131,8 +25173,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11409/12318 [19:45:47<1:34:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11409/12318 [19:45:47<1:34:28,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  98%|███▉| 151971/154911 [00:45<00:02, 1203.08 examples/s]"
      ]
     },
     {
@@ -129140,8 +25181,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11410/12318 [19:45:54<1:34:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11410/12318 [19:45:54<1:34:22,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  98%|███▉| 152391/154911 [00:45<00:01, 1391.30 examples/s]"
      ]
     },
     {
@@ -129149,8 +25189,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11411/12318 [19:46:02<1:34:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11411/12318 [19:46:02<1:34:16,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  99%|████▉| 152811/154911 [00:46<00:02, 934.69 examples/s]"
      ]
     },
     {
@@ -129158,8 +25197,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11412/12318 [19:46:04<1:34:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11412/12318 [19:46:04<1:34:09,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  99%|████▉| 153231/154911 [00:47<00:02, 688.85 examples/s]"
      ]
     },
     {
@@ -129167,8 +25205,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11413/12318 [19:46:11<1:34:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11413/12318 [19:46:11<1:34:03,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64):  99%|████▉| 153651/154911 [00:48<00:01, 773.85 examples/s]"
      ]
     },
     {
@@ -129176,8 +25213,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11414/12318 [19:46:13<1:33:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11414/12318 [19:46:13<1:33:57,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64): 100%|████▉| 154491/154911 [00:49<00:00, 678.14 examples/s]"
      ]
     },
     {
@@ -129185,8 +25221,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11415/12318 [19:46:16<1:33:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11415/12318 [19:46:16<1:33:50,  6.24s/it, v_num=e4xv, train/los"
+      "Filter (num_proc=64): 100%|█████| 154911/154911 [00:50<00:00, 771.54 examples/s]"
      ]
     },
     {
@@ -129194,8 +25229,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11416/12318 [19:46:21<1:33:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11416/12318 [19:46:21<1:33:44,  6.24s/it, v_num=e4xv, train/los"
+      "                                                                                \r"
      ]
     },
     {
@@ -129203,8 +25237,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11417/12318 [19:46:22<1:33:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11417/12318 [19:46:22<1:33:37,  6.23s/it, v_num=e4xv, train/los"
+      "Saving the dataset (0/8 shards):   0%|         | 0/98578 [00:00<?, ? examples/s]"
      ]
     },
     {
@@ -129212,8 +25245,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11418/12318 [19:46:30<1:33:31,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11418/12318 [19:46:30<1:33:31,  6.23s/it, v_num=e4xv, train/los"
+      "Saving the dataset (0/8 shards):   3%| | 3000/98578 [00:00<00:05, 18527.31 examp"
      ]
     },
     {
@@ -129221,8 +25253,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11419/12318 [19:46:36<1:33:25,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11419/12318 [19:46:36<1:33:25,  6.23s/it, v_num=e4xv, train/los"
+      "Saving the dataset (0/8 shards):   7%| | 7000/98578 [00:00<00:04, 20097.03 examp"
      ]
     },
     {
@@ -129230,8 +25261,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11420/12318 [19:46:44<1:33:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11420/12318 [19:46:44<1:33:19,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (0/8 shards):  11%| | 11000/98578 [00:00<00:04, 21169.98 exam"
      ]
     },
     {
@@ -129239,8 +25269,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11421/12318 [19:46:52<1:33:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11421/12318 [19:46:52<1:33:13,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (1/8 shards):  13%|▏| 12323/98578 [00:00<00:04, 21169.98 exam"
      ]
     },
     {
@@ -129248,8 +25277,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11422/12318 [19:46:55<1:33:06,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11422/12318 [19:46:55<1:33:06,  6.23s/it, v_num=e4xv, train/los"
+      "Saving the dataset (1/8 shards):  15%|▏| 14323/98578 [00:00<00:03, 21233.33 exam"
      ]
     },
     {
@@ -129257,8 +25285,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11423/12318 [19:47:03<1:33:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11423/12318 [19:47:03<1:33:00,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (1/8 shards):  19%|▏| 18323/98578 [00:00<00:03, 22135.19 exam"
      ]
     },
     {
@@ -129266,8 +25293,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11424/12318 [19:47:38<1:32:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11424/12318 [19:47:38<1:32:56,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (1/8 shards):  23%|▏| 22323/98578 [00:01<00:03, 22970.66 exam"
      ]
     },
     {
@@ -129275,8 +25301,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11425/12318 [19:47:40<1:32:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11425/12318 [19:47:40<1:32:49,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (2/8 shards):  25%|▎| 24646/98578 [00:01<00:03, 22970.66 exam"
      ]
     },
     {
@@ -129284,8 +25309,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11426/12318 [19:47:42<1:32:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11426/12318 [19:47:42<1:32:43,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (2/8 shards):  27%|▎| 26646/98578 [00:01<00:03, 23065.81 exam"
      ]
     },
     {
@@ -129293,8 +25317,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11427/12318 [19:47:46<1:32:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11427/12318 [19:47:46<1:32:36,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (2/8 shards):  31%|▎| 30646/98578 [00:01<00:02, 23753.83 exam"
      ]
     },
     {
@@ -129302,8 +25325,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11428/12318 [19:47:50<1:32:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11428/12318 [19:47:50<1:32:30,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (2/8 shards):  35%|▎| 34646/98578 [00:01<00:02, 24541.91 exam"
      ]
     },
     {
@@ -129311,8 +25333,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11429/12318 [19:47:55<1:32:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11429/12318 [19:47:55<1:32:24,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (3/8 shards):  38%|▍| 36968/98578 [00:01<00:02, 24541.91 exam"
      ]
     },
     {
@@ -129320,8 +25341,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11430/12318 [19:48:01<1:32:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11430/12318 [19:48:01<1:32:17,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (3/8 shards):  40%|▍| 38968/98578 [00:01<00:02, 24818.09 exam"
      ]
     },
     {
@@ -129329,8 +25349,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11431/12318 [19:48:07<1:32:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11431/12318 [19:48:07<1:32:11,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (3/8 shards):  44%|▍| 42968/98578 [00:01<00:02, 25747.69 exam"
      ]
     },
     {
@@ -129338,8 +25357,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11432/12318 [19:48:11<1:32:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11432/12318 [19:48:11<1:32:05,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (3/8 shards):  48%|▍| 46968/98578 [00:01<00:01, 26252.00 exam"
      ]
     },
     {
@@ -129347,8 +25365,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11433/12318 [19:48:16<1:31:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11433/12318 [19:48:16<1:31:58,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (4/8 shards):  50%|▌| 49290/98578 [00:02<00:01, 26252.00 exam"
      ]
     },
     {
@@ -129356,8 +25373,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11434/12318 [19:48:25<1:31:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11434/12318 [19:48:25<1:31:52,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (4/8 shards):  52%|▌| 51290/98578 [00:02<00:01, 25473.05 exam"
      ]
     },
     {
@@ -129365,8 +25381,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11435/12318 [19:48:34<1:31:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11435/12318 [19:48:34<1:31:46,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (4/8 shards):  56%|▌| 55290/98578 [00:02<00:01, 26262.70 exam"
      ]
     },
     {
@@ -129374,8 +25389,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11436/12318 [19:48:43<1:31:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11436/12318 [19:48:43<1:31:40,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (4/8 shards):  60%|▌| 59290/98578 [00:02<00:01, 26827.24 exam"
      ]
     },
     {
@@ -129383,8 +25397,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11437/12318 [19:48:50<1:31:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11437/12318 [19:48:50<1:31:34,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (5/8 shards):  63%|▋| 61612/98578 [00:02<00:01, 26827.24 exam"
      ]
     },
     {
@@ -129392,8 +25405,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11438/12318 [19:48:52<1:31:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11438/12318 [19:48:52<1:31:28,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (5/8 shards):  65%|▋| 63612/98578 [00:02<00:01, 26835.26 exam"
      ]
     },
     {
@@ -129401,8 +25413,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11439/12318 [19:48:58<1:31:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11439/12318 [19:48:58<1:31:21,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (5/8 shards):  69%|▋| 67612/98578 [00:02<00:01, 26690.16 exam"
      ]
     },
     {
@@ -129410,8 +25421,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11440/12318 [19:49:04<1:31:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11440/12318 [19:49:04<1:31:15,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (5/8 shards):  73%|▋| 71612/98578 [00:02<00:00, 27219.18 exam"
      ]
     },
     {
@@ -129419,8 +25429,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11441/12318 [19:49:12<1:31:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11441/12318 [19:49:12<1:31:09,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (6/8 shards):  75%|▊| 73934/98578 [00:02<00:00, 27219.18 exam"
      ]
     },
     {
@@ -129428,8 +25437,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11442/12318 [19:49:16<1:31:03,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11442/12318 [19:49:16<1:31:03,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (6/8 shards):  77%|▊| 75934/98578 [00:03<00:00, 27394.39 exam"
      ]
     },
     {
@@ -129437,8 +25445,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11443/12318 [19:49:17<1:30:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11443/12318 [19:49:17<1:30:56,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (6/8 shards):  80%|▊| 78934/98578 [00:03<00:00, 25879.88 exam"
      ]
     },
     {
@@ -129446,8 +25453,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11444/12318 [19:49:21<1:30:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11444/12318 [19:49:21<1:30:49,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (6/8 shards):  84%|▊| 82934/98578 [00:03<00:00, 26942.57 exam"
      ]
     },
     {
@@ -129455,8 +25461,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11445/12318 [19:49:28<1:30:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11445/12318 [19:49:28<1:30:43,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (6/8 shards):  88%|▉| 86256/98578 [00:03<00:00, 27554.84 exam\r",
+      "Saving the dataset (7/8 shards):  88%|▉| 86256/98578 [00:03<00:00, 27554.84 exam"
      ]
     },
     {
@@ -129464,8 +25470,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11446/12318 [19:49:37<1:30:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11446/12318 [19:49:37<1:30:37,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (7/8 shards):  92%|▉| 90256/98578 [00:03<00:00, 27517.24 exam"
      ]
     },
     {
@@ -129473,8 +25478,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11447/12318 [19:49:42<1:30:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11447/12318 [19:49:42<1:30:31,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (7/8 shards):  96%|▉| 94256/98578 [00:03<00:00, 28137.60 exam"
      ]
     },
     {
@@ -129482,8 +25486,9 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11448/12318 [19:49:51<1:30:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11448/12318 [19:49:51<1:30:25,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (7/8 shards): 100%|▉| 98256/98578 [00:03<00:00, 28717.59 exam\r",
+      "Saving the dataset (8/8 shards): 100%|█| 98578/98578 [00:03<00:00, 28717.59 exam\r",
+      "                                                                                \r"
      ]
     },
     {
@@ -129491,211 +25496,283 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11449/12318 [19:49:54<1:30:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11449/12318 [19:49:54<1:30:18,  6.24s/it, v_num=e4xv, train/los"
+      "Saving the dataset (0/1 shards):   0%|            | 0/99 [00:00<?, ? examples/s]\r",
+      "Saving the dataset (1/1 shards): 100%|█| 99/99 [00:00<00:00, 7058.12 examples/s]\r",
+      "                                                                                \r"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11450/12318 [19:49:59<1:30:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11450/12318 [19:49:59<1:30:12,  6.24s/it, v_num=e4xv, train/los"
+      "[rank: 0] Global seed set to 4016710040\r\n",
+      "initializing deepspeed distributed: GLOBAL_RANK: 0, MEMBER: 1/8\r\n",
+      "[2023-09-02 06:23:32,173] [WARNING] [comm.py:152:init_deepspeed_backend] NCCL backend in DeepSpeed not yet implemented\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11451/12318 [19:50:07<1:30:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11451/12318 [19:50:07<1:30:06,  6.24s/it, v_num=e4xv, train/los"
+      "Enabling DeepSpeed BF16.\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11452/12318 [19:50:11<1:30:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11452/12318 [19:50:11<1:30:00,  6.24s/it, v_num=e4xv, train/los"
+      "LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]\r\n",
+      "#\r\n",
+      "# RWKV lighting_trainer.py important notes \r\n",
+      "# https://github.com/RWKV/RWKV-infctx-trainer \r\n",
+      "#\r\n",
+      "# - Ensure your host is not running cuda 12.0 (use either 11.8, or >=12.1), as this is known to have freeze issues\r\n",
+      "# - The terms used in wandb / the progress bar can be confusing, see the github README.md for beter clarifications\r\n",
+      "# - When resuming from checkpoint, the estimated time is inaccurate\r\n",
+      "#\r\n",
+      "\r\n",
+      "[RWKV.model] Configuring optimizer with\r\n",
+      "    - lr_init:  3.000e-04 (0.0003)\r\n",
+      "    - lr_final: 1.000e-04 (0.0001)\r\n",
+      "\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11453/12318 [19:50:12<1:29:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11453/12318 [19:50:12<1:29:53,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Detected CUDA files, patching ldflags\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/fused_adam/build.ninja...\r\n",
+      "Building extension module fused_adam...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11454/12318 [19:50:15<1:29:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11454/12318 [19:50:15<1:29:47,  6.24s/it, v_num=e4xv, train/los"
+      "ninja: no work to do.\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.06581354141235352 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11455/12318 [19:50:21<1:29:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11455/12318 [19:50:21<1:29:40,  6.23s/it, v_num=e4xv, train/los"
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10160326957702637 seconds\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10122323036193848 seconds\r\n",
+      "Time to load fused_adam op: 0.10131621360778809 seconds\r\n",
+      "Time to load fused_adam op: 0.10161590576171875 seconds\r\n",
+      "Time to load fused_adam op: 0.10172796249389648 seconds\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10164880752563477 seconds\r\n",
+      "Loading `train_dataloader` to estimate number of stepping batches.\r\n",
+      "Loading extension module fused_adam...\r\n",
+      "Time to load fused_adam op: 0.10137581825256348 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11456/12318 [19:51:00<1:29:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11456/12318 [19:51:00<1:29:36,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11457/12318 [19:51:05<1:29:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11457/12318 [19:51:05<1:29:30,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Emitting ninja build file /root/.cache/torch_extensions/py311_cu118/utils/build.ninja...\r\n",
+      "Building extension module utils...\r\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11458/12318 [19:51:07<1:29:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11458/12318 [19:51:07<1:29:24,  6.24s/it, v_num=e4xv, train/los"
+      "ninja: no work to do.\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0746297836303711 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10225343704223633 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11459/12318 [19:51:09<1:29:17,  6.24s/it, v_num=e4xv, train/los"
+      "Loading extension module utils...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.1022336483001709 seconds\r\n",
+      "Time to load utils op: 0.10247349739074707 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.1032721996307373 seconds\r\n",
+      "Time to load utils op: 0.10237407684326172 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10254526138305664 seconds\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.10282373428344727 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11459/12318 [19:51:09<1:29:17,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 5 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11460/12318 [19:51:10<1:29:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11460/12318 [19:51:10<1:29:10,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 1 partition count [8, 8] and sizes[(176584448, False), (384, False)] "
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11461/12318 [19:51:14<1:29:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11461/12318 [19:51:14<1:29:04,  6.24s/it, v_num=e4xv, train/los"
+      "\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11462/12318 [19:51:20<1:28:58,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 2 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11462/12318 [19:51:20<1:28:58,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 4 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11463/12318 [19:51:29<1:28:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11463/12318 [19:51:29<1:28:52,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 7 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11464/12318 [19:51:37<1:28:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11464/12318 [19:51:37<1:28:46,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 0 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11465/12318 [19:51:43<1:28:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11465/12318 [19:51:43<1:28:39,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 3 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11466/12318 [19:51:51<1:28:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11466/12318 [19:51:51<1:28:33,  6.24s/it, v_num=e4xv, train/los"
+      "Rank: 6 partition count [8, 8] and sizes[(176584448, False), (384, False)] \r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11467/12318 [19:51:59<1:28:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11467/12318 [19:51:59<1:28:27,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006437301635742188 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0009703636169433594 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006546974182128906 seconds\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006368160247802734 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006356239318847656 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11468/12318 [19:52:00<1:28:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11468/12318 [19:52:00<1:28:21,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0006177425384521484 seconds\r\n",
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0008919239044189453 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11469/12318 [19:52:06<1:28:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11469/12318 [19:52:06<1:28:14,  6.24s/it, v_num=e4xv, train/los"
+      "Using /root/.cache/torch_extensions/py311_cu118 as PyTorch extensions root...\r\n",
+      "No modifications detected for re-loaded extension module utils, skipping build step...\r\n",
+      "Loading extension module utils...\r\n",
+      "Time to load utils op: 0.0009381771087646484 seconds\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  93%|▉| 11470/12318 [19:52:09<1:28:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11470/12318 [19:52:09<1:28:08,  6.24s/it, v_num=e4xv, train/los"
+      "\r\n",
+      "  | Name   | Type       | Params\r\n",
+      "--------------------------------------\r\n",
+      "0 | emb    | Embedding  | 51.5 M\r\n",
+      "1 | blocks | ModuleList | 1.3 B \r\n",
+      "2 | ln_out | LayerNorm  | 2.0 K \r\n",
+      "3 | head   | Linear     | 51.5 M\r\n",
+      "--------------------------------------\r\n",
+      "1.4 B     Trainable params\r\n",
+      "0         Non-trainable params\r\n",
+      "1.4 B     Total params\r\n",
+      "5,650.715 Total estimated model params size (MB)\r\n"
      ]
     },
     {
@@ -129703,8 +25780,9 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11471/12318 [19:52:16<1:28:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11471/12318 [19:52:16<1:28:02,  6.24s/it, v_num=e4xv, train/los"
+      "Training: 0it [00:00, ?it/s]\r",
+      "Training:   0%|                                       | 0/12323 [00:00<?, ?it/s]\r",
+      "Epoch 0:   0%|                                        | 0/12323 [00:00<?, ?it/s]"
      ]
     },
     {
@@ -129712,8 +25790,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11472/12318 [19:52:21<1:27:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11472/12318 [19:52:21<1:27:55,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%|                             | 1/12323 [00:15<54:17:40, 15.86s/it]\r",
+      "Epoch 0:   0%| | 1/12323 [00:15<54:17:57, 15.86s/it, v_num=i2o7, train/loss=0.50"
      ]
     },
     {
@@ -129721,8 +25799,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11473/12318 [19:52:25<1:27:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11473/12318 [19:52:25<1:27:49,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 2/12323 [00:19<34:07:52,  9.97s/it, v_num=i2o7, train/loss=0.50\r",
+      "Epoch 0:   0%| | 2/12323 [00:19<34:07:55,  9.97s/it, v_num=i2o7, train/loss=0.12"
      ]
     },
     {
@@ -129730,7 +25808,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11474/12318 [19:52:34<1:27:43,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 3/12323 [00:23<26:51:33,  7.85s/it, v_num=i2o7, train/loss=0.12\r",
+      "Epoch 0:   0%| | 3/12323 [00:23<26:51:35,  7.85s/it, v_num=i2o7, train/loss=0.10"
      ]
     },
     {
@@ -129738,7 +25817,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11474/12318 [19:52:34<1:27:43,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 4/12323 [00:32<28:02:51,  8.20s/it, v_num=i2o7, train/loss=0.10\r",
+      "Epoch 0:   0%| | 4/12323 [00:32<28:02:54,  8.20s/it, v_num=i2o7, train/loss=8.25"
      ]
     },
     {
@@ -129746,8 +25826,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11475/12318 [19:52:40<1:27:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11475/12318 [19:52:40<1:27:37,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 5/12323 [00:37<25:55:19,  7.58s/it, v_num=i2o7, train/loss=8.25\r",
+      "Epoch 0:   0%| | 5/12323 [00:37<25:55:21,  7.58s/it, v_num=i2o7, train/loss=0.53"
      ]
     },
     {
@@ -129755,8 +25835,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11476/12318 [19:52:49<1:27:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11476/12318 [19:52:49<1:27:31,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 6/12323 [00:41<23:46:16,  6.95s/it, v_num=i2o7, train/loss=0.53\r",
+      "Epoch 0:   0%| | 6/12323 [00:41<23:46:18,  6.95s/it, v_num=i2o7, train/loss=0.24"
      ]
     },
     {
@@ -129764,8 +25844,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11477/12318 [19:52:51<1:27:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11477/12318 [19:52:51<1:27:24,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 7/12323 [00:43<21:29:50,  6.28s/it, v_num=i2o7, train/loss=0.24\r",
+      "Epoch 0:   0%| | 7/12323 [00:43<21:29:51,  6.28s/it, v_num=i2o7, train/loss=0.09"
      ]
     },
     {
@@ -129773,7 +25853,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11478/12318 [19:52:58<1:27:18,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 8/12323 [00:50<21:33:12,  6.30s/it, v_num=i2o7, train/loss=0.09\r",
+      "Epoch 0:   0%| | 8/12323 [00:50<21:33:13,  6.30s/it, v_num=i2o7, train/loss=5.22"
      ]
     },
     {
@@ -129781,7 +25862,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11478/12318 [19:52:58<1:27:18,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 9/12323 [00:57<21:57:52,  6.42s/it, v_num=i2o7, train/loss=5.22\r",
+      "Epoch 0:   0%| | 9/12323 [00:57<21:57:53,  6.42s/it, v_num=i2o7, train/loss=6.00"
      ]
     },
     {
@@ -129789,8 +25871,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11479/12318 [19:53:04<1:27:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11479/12318 [19:53:04<1:27:12,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 10/12323 [01:05<22:17:54,  6.52s/it, v_num=i2o7, train/loss=6.0\r",
+      "Epoch 0:   0%| | 10/12323 [01:05<22:17:56,  6.52s/it, v_num=i2o7, train/loss=6.7"
      ]
     },
     {
@@ -129798,8 +25880,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11480/12318 [19:53:11<1:27:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11480/12318 [19:53:11<1:27:05,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 11/12323 [01:09<21:45:45,  6.36s/it, v_num=i2o7, train/loss=6.7\r",
+      "Epoch 0:   0%| | 11/12323 [01:09<21:45:46,  6.36s/it, v_num=i2o7, train/loss=0.6"
      ]
     },
     {
@@ -129807,8 +25889,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11481/12318 [19:53:16<1:26:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11481/12318 [19:53:16<1:26:59,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 12/12323 [01:11<20:18:23,  5.94s/it, v_num=i2o7, train/loss=0.6\r",
+      "Epoch 0:   0%| | 12/12323 [01:11<20:18:24,  5.94s/it, v_num=i2o7, train/loss=0.0"
      ]
     },
     {
@@ -129816,8 +25898,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11482/12318 [19:53:23<1:26:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11482/12318 [19:53:23<1:26:53,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 13/12323 [01:17<20:24:40,  5.97s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 13/12323 [01:17<20:24:41,  5.97s/it, v_num=i2o7, train/loss=5.6"
      ]
     },
     {
@@ -129825,8 +25907,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11483/12318 [19:53:32<1:26:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11483/12318 [19:53:32<1:26:47,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 14/12323 [01:25<20:59:28,  6.14s/it, v_num=i2o7, train/loss=5.6\r",
+      "Epoch 0:   0%| | 14/12323 [01:25<20:59:29,  6.14s/it, v_num=i2o7, train/loss=8.1"
      ]
     },
     {
@@ -129834,8 +25916,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11484/12318 [19:53:38<1:26:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11484/12318 [19:53:38<1:26:41,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 15/12323 [01:31<20:49:13,  6.09s/it, v_num=i2o7, train/loss=8.1\r",
+      "Epoch 0:   0%| | 15/12323 [01:31<20:49:14,  6.09s/it, v_num=i2o7, train/loss=1.2"
      ]
     },
     {
@@ -129843,8 +25925,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11485/12318 [19:53:42<1:26:34,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11485/12318 [19:53:42<1:26:34,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 16/12323 [01:36<20:40:32,  6.05s/it, v_num=i2o7, train/loss=1.2\r",
+      "Epoch 0:   0%| | 16/12323 [01:36<20:40:33,  6.05s/it, v_num=i2o7, train/loss=1.7"
      ]
     },
     {
@@ -129852,8 +25934,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11486/12318 [19:53:44<1:26:28,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11486/12318 [19:53:44<1:26:28,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 17/12323 [01:38<19:51:29,  5.81s/it, v_num=i2o7, train/loss=1.7\r",
+      "Epoch 0:   0%| | 17/12323 [01:38<19:51:29,  5.81s/it, v_num=i2o7, train/loss=0.0"
      ]
     },
     {
@@ -129861,8 +25943,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11487/12318 [19:53:52<1:26:22,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11487/12318 [19:53:52<1:26:22,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 18/12323 [01:41<19:16:26,  5.64s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 18/12323 [01:41<19:16:26,  5.64s/it, v_num=i2o7, train/loss=0.1"
      ]
     },
     {
@@ -129870,8 +25952,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11488/12318 [19:54:09<1:26:16,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11488/12318 [19:54:09<1:26:16,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 19/12323 [01:44<18:50:18,  5.51s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 19/12323 [01:44<18:50:18,  5.51s/it, v_num=i2o7, train/loss=0.1"
      ]
     },
     {
@@ -129879,8 +25961,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11489/12318 [19:54:15<1:26:10,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11489/12318 [19:54:15<1:26:10,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 20/12323 [01:53<19:19:22,  5.65s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 20/12323 [01:53<19:19:22,  5.65s/it, v_num=i2o7, train/loss=8.1"
      ]
     },
     {
@@ -129888,8 +25970,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11490/12318 [19:54:22<1:26:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11490/12318 [19:54:22<1:26:04,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 21/12323 [01:55<18:48:28,  5.50s/it, v_num=i2o7, train/loss=8.1\r",
+      "Epoch 0:   0%| | 21/12323 [01:55<18:48:29,  5.50s/it, v_num=i2o7, train/loss=0.1"
      ]
     },
     {
@@ -129897,8 +25979,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11491/12318 [19:54:26<1:25:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11491/12318 [19:54:26<1:25:57,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 22/12323 [01:58<18:27:39,  5.40s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 22/12323 [01:58<18:27:39,  5.40s/it, v_num=i2o7, train/loss=0.1"
      ]
     },
     {
@@ -129906,8 +25988,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11492/12318 [19:54:32<1:25:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11492/12318 [19:54:32<1:25:51,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 23/12323 [02:03<18:17:47,  5.36s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 23/12323 [02:03<18:17:47,  5.36s/it, v_num=i2o7, train/loss=0.3"
      ]
     },
     {
@@ -129915,8 +25997,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11493/12318 [19:54:34<1:25:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11493/12318 [19:54:34<1:25:45,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 24/12323 [02:07<18:08:10,  5.31s/it, v_num=i2o7, train/loss=0.3\r",
+      "Epoch 0:   0%| | 24/12323 [02:07<18:08:10,  5.31s/it, v_num=i2o7, train/loss=0.2"
      ]
     },
     {
@@ -129924,8 +26006,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11494/12318 [19:54:41<1:25:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11494/12318 [19:54:41<1:25:38,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 25/12323 [02:11<17:59:05,  5.26s/it, v_num=i2o7, train/loss=0.2\r",
+      "Epoch 0:   0%| | 25/12323 [02:11<17:59:05,  5.26s/it, v_num=i2o7, train/loss=0.3"
      ]
     },
     {
@@ -129933,8 +26015,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11495/12318 [19:54:46<1:25:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11495/12318 [19:54:46<1:25:32,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 26/12323 [02:14<17:42:35,  5.18s/it, v_num=i2o7, train/loss=0.3\r",
+      "Epoch 0:   0%| | 26/12323 [02:14<17:42:36,  5.18s/it, v_num=i2o7, train/loss=0.1"
      ]
     },
     {
@@ -129942,8 +26024,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11496/12318 [19:54:55<1:25:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11496/12318 [19:54:55<1:25:26,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 27/12323 [02:21<17:57:34,  5.26s/it, v_num=i2o7, train/loss=0.1\r",
+      "Epoch 0:   0%| | 27/12323 [02:21<17:57:35,  5.26s/it, v_num=i2o7, train/loss=7.1"
      ]
     },
     {
@@ -129951,8 +26033,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11497/12318 [19:54:58<1:25:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11497/12318 [19:54:58<1:25:20,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 28/12323 [02:24<17:35:13,  5.15s/it, v_num=i2o7, train/loss=7.1\r",
+      "Epoch 0:   0%| | 28/12323 [02:24<17:35:13,  5.15s/it, v_num=i2o7, train/loss=0.0"
      ]
     },
     {
@@ -129960,8 +26042,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11498/12318 [19:55:04<1:25:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11498/12318 [19:55:04<1:25:13,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 29/12323 [02:28<17:31:47,  5.13s/it, v_num=i2o7, train/loss=0.0\r",
+      "Epoch 0:   0%| | 29/12323 [02:28<17:31:47,  5.13s/it, v_num=i2o7, train/loss=0.9"
      ]
     },
     {
@@ -129969,8 +26051,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11499/12318 [19:55:13<1:25:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11499/12318 [19:55:13<1:25:07,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 30/12323 [02:35<17:39:22,  5.17s/it, v_num=i2o7, train/loss=0.9\r",
+      "Epoch 0:   0%| | 30/12323 [02:35<17:39:22,  5.17s/it, v_num=i2o7, train/loss=5.6"
      ]
     },
     {
@@ -129978,8 +26060,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11500/12318 [19:55:21<1:25:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11500/12318 [19:55:21<1:25:01,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 31/12323 [02:40<17:40:10,  5.17s/it, v_num=i2o7, train/loss=5.6\r",
+      "Epoch 0:   0%| | 31/12323 [02:40<17:40:10,  5.17s/it, v_num=i2o7, train/loss=1.8"
      ]
     },
     {
@@ -129987,8 +26069,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11501/12318 [19:55:26<1:24:55,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11501/12318 [19:55:26<1:24:55,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 32/12323 [03:21<21:27:07,  6.28s/it, v_num=i2o7, train/loss=1.8"
      ]
     },
     {
@@ -129996,8 +26077,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11502/12318 [19:55:29<1:24:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11502/12318 [19:55:29<1:24:48,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 32/12323 [03:23<21:45:52,  6.37s/it, v_num=i2o7, train/loss=4.7"
      ]
     },
     {
@@ -130005,8 +26085,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11503/12318 [19:55:32<1:24:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11503/12318 [19:55:32<1:24:42,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 33/12323 [03:27<21:30:07,  6.30s/it, v_num=i2o7, train/loss=4.7\r",
+      "Epoch 0:   0%| | 33/12323 [03:27<21:30:07,  6.30s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130014,8 +26094,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11504/12318 [19:55:41<1:24:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11504/12318 [19:55:41<1:24:36,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 34/12323 [03:29<20:59:29,  6.15s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 34/12323 [03:29<20:59:29,  6.15s/it, v_num=i2o7, train/loss=8.8"
      ]
     },
     {
@@ -130023,8 +26103,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11505/12318 [19:55:47<1:24:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11505/12318 [19:55:47<1:24:30,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 35/12323 [03:30<20:30:35,  6.01s/it, v_num=i2o7, train/loss=8.8\r",
+      "Epoch 0:   0%| | 35/12323 [03:30<20:30:35,  6.01s/it, v_num=i2o7, train/loss=6.6"
      ]
     },
     {
@@ -130032,8 +26112,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11506/12318 [19:55:55<1:24:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11506/12318 [19:55:55<1:24:23,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 36/12323 [03:38<20:42:57,  6.07s/it, v_num=i2o7, train/loss=6.6\r",
+      "Epoch 0:   0%| | 36/12323 [03:38<20:42:57,  6.07s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130041,8 +26121,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11507/12318 [19:56:00<1:24:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11507/12318 [19:56:00<1:24:17,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 37/12323 [03:44<20:43:49,  6.07s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 37/12323 [03:44<20:43:49,  6.07s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130050,8 +26130,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11508/12318 [19:56:01<1:24:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11508/12318 [19:56:01<1:24:11,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 38/12323 [03:51<20:49:55,  6.10s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 38/12323 [03:51<20:49:55,  6.10s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130059,8 +26139,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11509/12318 [19:56:05<1:24:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11509/12318 [19:56:05<1:24:04,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 39/12323 [03:54<20:29:47,  6.01s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 39/12323 [03:54<20:29:48,  6.01s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130068,8 +26148,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11510/12318 [19:56:07<1:23:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11510/12318 [19:56:07<1:23:58,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 40/12323 [03:58<20:20:40,  5.96s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 40/12323 [03:58<20:20:40,  5.96s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130077,8 +26157,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11511/12318 [19:56:13<1:23:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11511/12318 [19:56:13<1:23:51,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 41/12323 [04:03<20:16:55,  5.94s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 41/12323 [04:03<20:16:55,  5.94s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130086,8 +26166,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11512/12318 [19:56:20<1:23:45,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11512/12318 [19:56:20<1:23:45,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 42/12323 [04:07<20:06:08,  5.89s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 42/12323 [04:07<20:06:08,  5.89s/it, v_num=i2o7, train/loss=12."
      ]
     },
     {
@@ -130095,8 +26175,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11513/12318 [19:56:26<1:23:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11513/12318 [19:56:26<1:23:39,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 43/12323 [04:12<20:00:16,  5.86s/it, v_num=i2o7, train/loss=12.\r",
+      "Epoch 0:   0%| | 43/12323 [04:12<20:00:16,  5.86s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130104,8 +26184,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11514/12318 [19:56:35<1:23:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11514/12318 [19:56:35<1:23:33,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 44/12323 [04:14<19:43:10,  5.78s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 44/12323 [04:14<19:43:10,  5.78s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130113,8 +26193,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11515/12318 [19:56:43<1:23:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11515/12318 [19:56:43<1:23:27,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 45/12323 [04:21<19:49:25,  5.81s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 45/12323 [04:21<19:49:25,  5.81s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130122,8 +26202,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11516/12318 [19:56:44<1:23:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11516/12318 [19:56:44<1:23:20,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 46/12323 [04:27<19:51:16,  5.82s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 46/12323 [04:27<19:51:16,  5.82s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130131,8 +26211,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  93%|▉| 11517/12318 [19:56:51<1:23:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  93%|▉| 11517/12318 [19:56:51<1:23:14,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 47/12323 [04:32<19:48:18,  5.81s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 47/12323 [04:32<19:48:18,  5.81s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130140,8 +26220,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11518/12318 [19:56:56<1:23:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11518/12318 [19:56:56<1:23:08,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 48/12323 [04:41<19:58:01,  5.86s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 48/12323 [04:41<19:58:01,  5.86s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130149,8 +26229,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11519/12318 [19:57:03<1:23:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11519/12318 [19:57:03<1:23:01,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 49/12323 [04:45<19:51:03,  5.82s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 49/12323 [04:45<19:51:03,  5.82s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130158,8 +26238,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11520/12318 [19:57:17<1:22:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11520/12318 [19:57:17<1:22:56,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 50/12323 [04:48<19:38:26,  5.76s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 50/12323 [04:48<19:38:26,  5.76s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130167,8 +26247,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11521/12318 [19:57:24<1:22:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11521/12318 [19:57:24<1:22:50,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 51/12323 [04:49<19:22:20,  5.68s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 51/12323 [04:49<19:22:20,  5.68s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130176,8 +26256,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11522/12318 [19:57:28<1:22:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11522/12318 [19:57:28<1:22:43,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 52/12323 [04:54<19:16:46,  5.66s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 52/12323 [04:54<19:16:46,  5.66s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130185,8 +26265,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11523/12318 [19:57:30<1:22:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11523/12318 [19:57:30<1:22:37,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 53/12323 [04:59<19:15:27,  5.65s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 53/12323 [04:59<19:15:27,  5.65s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130194,8 +26274,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11524/12318 [19:57:39<1:22:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11524/12318 [19:57:39<1:22:31,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 54/12323 [05:02<19:06:28,  5.61s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 54/12323 [05:02<19:06:28,  5.61s/it, v_num=i2o7, train/loss=12."
      ]
     },
     {
@@ -130203,8 +26283,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11525/12318 [19:57:47<1:22:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11525/12318 [19:57:47<1:22:24,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 55/12323 [05:04<18:50:12,  5.53s/it, v_num=i2o7, train/loss=12.\r",
+      "Epoch 0:   0%| | 55/12323 [05:04<18:50:12,  5.53s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130212,8 +26292,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11526/12318 [19:57:50<1:22:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11526/12318 [19:57:50<1:22:18,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 56/12323 [05:11<18:56:25,  5.56s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   0%| | 56/12323 [05:11<18:56:25,  5.56s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130221,8 +26301,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11527/12318 [19:57:58<1:22:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11527/12318 [19:57:58<1:22:12,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 57/12323 [05:14<18:48:05,  5.52s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 57/12323 [05:14<18:48:05,  5.52s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130230,7 +26310,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11528/12318 [19:58:04<1:22:06,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 58/12323 [05:19<18:47:47,  5.52s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 58/12323 [05:19<18:47:47,  5.52s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130238,7 +26319,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11528/12318 [19:58:04<1:22:06,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 59/12323 [05:26<18:50:49,  5.53s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 59/12323 [05:26<18:50:49,  5.53s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130246,8 +26328,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11529/12318 [19:58:12<1:22:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11529/12318 [19:58:12<1:22:00,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 60/12323 [05:28<18:39:44,  5.48s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 60/12323 [05:28<18:39:44,  5.48s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130255,8 +26337,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11530/12318 [19:58:13<1:21:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11530/12318 [19:58:13<1:21:53,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   0%| | 61/12323 [05:34<18:39:19,  5.48s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   0%| | 61/12323 [05:34<18:39:19,  5.48s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130264,8 +26346,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11531/12318 [19:58:16<1:21:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11531/12318 [19:58:16<1:21:47,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 62/12323 [05:35<18:27:05,  5.42s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   1%| | 62/12323 [05:35<18:27:05,  5.42s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130273,8 +26355,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11532/12318 [19:58:21<1:21:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11532/12318 [19:58:21<1:21:40,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 63/12323 [05:40<18:25:10,  5.41s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 63/12323 [05:40<18:25:10,  5.41s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130282,8 +26364,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11533/12318 [19:58:25<1:21:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11533/12318 [19:58:25<1:21:34,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 64/12323 [06:37<21:09:47,  6.21s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130291,8 +26372,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11534/12318 [19:58:28<1:21:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11534/12318 [19:58:28<1:21:27,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 64/12323 [06:40<21:19:10,  6.26s/it, v_num=i2o7, train/loss=11."
      ]
     },
     {
@@ -130300,8 +26380,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11535/12318 [19:58:35<1:21:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11535/12318 [19:58:35<1:21:21,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 65/12323 [06:47<21:19:32,  6.26s/it, v_num=i2o7, train/loss=11.\r",
+      "Epoch 0:   1%| | 65/12323 [06:47<21:19:32,  6.26s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130309,8 +26389,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11536/12318 [19:58:38<1:21:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11536/12318 [19:58:38<1:21:15,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 66/12323 [06:50<21:11:53,  6.23s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 66/12323 [06:50<21:11:53,  6.23s/it, v_num=i2o7, train/loss=9.6"
      ]
     },
     {
@@ -130318,8 +26398,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11537/12318 [19:58:40<1:21:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11537/12318 [19:58:40<1:21:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 67/12323 [06:59<21:18:30,  6.26s/it, v_num=i2o7, train/loss=9.6\r",
+      "Epoch 0:   1%| | 67/12323 [06:59<21:18:30,  6.26s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
@@ -130327,8 +26407,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11538/12318 [19:58:49<1:21:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11538/12318 [19:58:49<1:21:02,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 68/12323 [07:01<21:06:36,  6.20s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 68/12323 [07:01<21:06:36,  6.20s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130336,8 +26416,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11539/12318 [19:58:52<1:20:56,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11539/12318 [19:58:52<1:20:56,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 69/12323 [07:05<20:57:59,  6.16s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 69/12323 [07:05<20:58:00,  6.16s/it, v_num=i2o7, train/loss=9.8"
      ]
     },
     {
@@ -130345,8 +26425,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11540/12318 [19:58:56<1:20:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11540/12318 [19:58:56<1:20:49,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 70/12323 [07:08<20:51:12,  6.13s/it, v_num=i2o7, train/loss=9.8\r",
+      "Epoch 0:   1%| | 70/12323 [07:08<20:51:12,  6.13s/it, v_num=i2o7, train/loss=9.5"
      ]
     },
     {
@@ -130354,8 +26434,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11541/12318 [19:59:04<1:20:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11541/12318 [19:59:04<1:20:43,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 71/12323 [07:16<20:55:08,  6.15s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 71/12323 [07:16<20:55:09,  6.15s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
@@ -130363,8 +26443,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11542/12318 [19:59:09<1:20:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11542/12318 [19:59:09<1:20:37,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 72/12323 [07:21<20:51:28,  6.13s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 72/12323 [07:21<20:51:28,  6.13s/it, v_num=i2o7, train/loss=9.5"
      ]
     },
     {
@@ -130372,7 +26452,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11543/12318 [19:59:14<1:20:31,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 73/12323 [07:28<20:55:02,  6.15s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 73/12323 [07:28<20:55:02,  6.15s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130380,7 +26461,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11543/12318 [19:59:14<1:20:31,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 74/12323 [07:36<20:58:28,  6.16s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 74/12323 [07:36<20:58:28,  6.16s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130388,8 +26470,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11544/12318 [19:59:19<1:20:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11544/12318 [19:59:19<1:20:24,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 75/12323 [07:38<20:46:34,  6.11s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 75/12323 [07:38<20:46:34,  6.11s/it, v_num=i2o7, train/loss=0.2"
      ]
     },
     {
@@ -130397,8 +26479,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11545/12318 [19:59:25<1:20:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11545/12318 [19:59:25<1:20:18,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 76/12323 [07:42<20:41:25,  6.08s/it, v_num=i2o7, train/loss=0.2\r",
+      "Epoch 0:   1%| | 76/12323 [07:42<20:41:25,  6.08s/it, v_num=i2o7, train/loss=9.4"
      ]
     },
     {
@@ -130406,8 +26488,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11546/12318 [19:59:30<1:20:12,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11546/12318 [19:59:30<1:20:12,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 77/12323 [07:47<20:38:58,  6.07s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 77/12323 [07:47<20:38:58,  6.07s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
@@ -130415,8 +26497,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11547/12318 [19:59:36<1:20:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11547/12318 [19:59:36<1:20:05,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 78/12323 [07:50<20:31:25,  6.03s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 78/12323 [07:50<20:31:25,  6.03s/it, v_num=i2o7, train/loss=9.9"
      ]
     },
     {
@@ -130424,8 +26506,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11548/12318 [19:59:40<1:19:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11548/12318 [19:59:40<1:19:59,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 79/12323 [07:55<20:27:49,  6.02s/it, v_num=i2o7, train/loss=9.9\r",
+      "Epoch 0:   1%| | 79/12323 [07:55<20:27:49,  6.02s/it, v_num=i2o7, train/loss=9.4"
      ]
     },
     {
@@ -130433,8 +26515,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11549/12318 [19:59:48<1:19:53,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11549/12318 [19:59:48<1:19:53,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 80/12323 [08:00<20:25:48,  6.01s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 80/12323 [08:00<20:25:48,  6.01s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130442,8 +26524,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11550/12318 [19:59:53<1:19:47,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11550/12318 [19:59:53<1:19:47,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 81/12323 [08:02<20:16:06,  5.96s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 81/12323 [08:02<20:16:06,  5.96s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130451,8 +26533,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11551/12318 [19:59:58<1:19:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11551/12318 [19:59:58<1:19:40,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 82/12323 [08:11<20:21:40,  5.99s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 82/12323 [08:11<20:21:40,  5.99s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
@@ -130460,8 +26542,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11552/12318 [20:00:38<1:19:36,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11552/12318 [20:00:38<1:19:36,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 83/12323 [08:15<20:17:08,  5.97s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 83/12323 [08:15<20:17:09,  5.97s/it, v_num=i2o7, train/loss=9.5"
      ]
     },
     {
@@ -130469,8 +26551,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11553/12318 [20:00:47<1:19:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11553/12318 [20:00:47<1:19:30,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 84/12323 [08:17<20:07:57,  5.92s/it, v_num=i2o7, train/loss=9.5\r",
+      "Epoch 0:   1%| | 84/12323 [08:17<20:07:58,  5.92s/it, v_num=i2o7, train/loss=10."
      ]
     },
     {
@@ -130478,8 +26560,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11554/12318 [20:00:49<1:19:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11554/12318 [20:00:49<1:19:24,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 85/12323 [08:24<20:11:13,  5.94s/it, v_num=i2o7, train/loss=10.\r",
+      "Epoch 0:   1%| | 85/12323 [08:24<20:11:13,  5.94s/it, v_num=i2o7, train/loss=9.0"
      ]
     },
     {
@@ -130487,8 +26569,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11555/12318 [20:00:54<1:19:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11555/12318 [20:00:54<1:19:17,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 86/12323 [08:29<20:08:20,  5.92s/it, v_num=i2o7, train/loss=9.0\r",
+      "Epoch 0:   1%| | 86/12323 [08:29<20:08:20,  5.92s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
@@ -130496,8 +26578,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11556/12318 [20:00:56<1:19:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11556/12318 [20:00:56<1:19:11,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 87/12323 [08:34<20:06:37,  5.92s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 87/12323 [08:34<20:06:37,  5.92s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
@@ -130505,8 +26587,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11557/12318 [20:01:05<1:19:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11557/12318 [20:01:05<1:19:05,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 88/12323 [08:38<20:01:37,  5.89s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 88/12323 [08:38<20:01:37,  5.89s/it, v_num=i2o7, train/loss=9.6"
      ]
     },
     {
@@ -130514,8 +26596,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11558/12318 [20:01:12<1:18:59,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11558/12318 [20:01:12<1:18:59,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 89/12323 [08:40<19:52:03,  5.85s/it, v_num=i2o7, train/loss=9.6\r",
+      "Epoch 0:   1%| | 89/12323 [08:40<19:52:03,  5.85s/it, v_num=i2o7, train/loss=9.9"
      ]
     },
     {
@@ -130523,8 +26605,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11559/12318 [20:01:20<1:18:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11559/12318 [20:01:20<1:18:53,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 90/12323 [08:47<19:55:15,  5.86s/it, v_num=i2o7, train/loss=9.9\r",
+      "Epoch 0:   1%| | 90/12323 [08:47<19:55:15,  5.86s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130532,8 +26614,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11560/12318 [20:01:28<1:18:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11560/12318 [20:01:28<1:18:46,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 91/12323 [08:52<19:53:54,  5.86s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 91/12323 [08:52<19:53:55,  5.86s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
@@ -130541,8 +26623,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11561/12318 [20:01:31<1:18:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11561/12318 [20:01:31<1:18:40,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 92/12323 [08:57<19:51:24,  5.84s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 92/12323 [08:57<19:51:24,  5.84s/it, v_num=i2o7, train/loss=9.4"
      ]
     },
     {
@@ -130550,8 +26632,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11562/12318 [20:01:33<1:18:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11562/12318 [20:01:33<1:18:33,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 93/12323 [09:02<19:48:36,  5.83s/it, v_num=i2o7, train/loss=9.4\r",
+      "Epoch 0:   1%| | 93/12323 [09:02<19:48:36,  5.83s/it, v_num=i2o7, train/loss=9.8"
      ]
     },
     {
@@ -130559,8 +26641,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11563/12318 [20:01:36<1:18:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11563/12318 [20:01:36<1:18:27,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 94/12323 [09:07<19:46:05,  5.82s/it, v_num=i2o7, train/loss=9.8\r",
+      "Epoch 0:   1%| | 94/12323 [09:07<19:46:05,  5.82s/it, v_num=i2o7, train/loss=9.3"
      ]
     },
     {
@@ -130568,8 +26650,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11564/12318 [20:01:44<1:18:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11564/12318 [20:01:44<1:18:21,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 95/12323 [09:12<19:44:46,  5.81s/it, v_num=i2o7, train/loss=9.3\r",
+      "Epoch 0:   1%| | 95/12323 [09:12<19:44:46,  5.81s/it, v_num=i2o7, train/loss=9.2"
      ]
     },
     {
@@ -130577,8 +26659,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11565/12318 [20:01:46<1:18:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11565/12318 [20:01:46<1:18:14,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 96/12323 [10:00<21:13:50,  6.25s/it, v_num=i2o7, train/loss=9.2\r",
+      "Epoch 0:   1%| | 96/12323 [10:00<21:13:50,  6.25s/it, v_num=i2o7, train/loss=9.1"
      ]
     },
     {
@@ -130586,8 +26668,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11566/12318 [20:01:53<1:18:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11566/12318 [20:01:53<1:18:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 97/12323 [10:08<21:18:08,  6.27s/it, v_num=i2o7, train/loss=9.1\r",
+      "Epoch 0:   1%| | 97/12323 [10:08<21:18:08,  6.27s/it, v_num=i2o7, train/loss=7.7"
      ]
     },
     {
@@ -130595,8 +26677,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11567/12318 [20:02:01<1:18:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11567/12318 [20:02:01<1:18:02,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 98/12323 [10:16<21:22:27,  6.29s/it, v_num=i2o7, train/loss=7.7\r",
+      "Epoch 0:   1%| | 98/12323 [10:16<21:22:27,  6.29s/it, v_num=i2o7, train/loss=7.8"
      ]
     },
     {
@@ -130604,8 +26686,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11568/12318 [20:02:09<1:17:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11568/12318 [20:02:09<1:17:56,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 99/12323 [10:22<21:20:14,  6.28s/it, v_num=i2o7, train/loss=7.8\r",
+      "Epoch 0:   1%| | 99/12323 [10:22<21:20:14,  6.28s/it, v_num=i2o7, train/loss=7.8"
      ]
     },
     {
@@ -130613,8 +26695,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11569/12318 [20:02:13<1:17:50,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11569/12318 [20:02:13<1:17:50,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 100/12323 [10:28<21:20:18,  6.28s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 100/12323 [10:28<21:20:19,  6.28s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130622,8 +26704,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11570/12318 [20:02:20<1:17:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11570/12318 [20:02:20<1:17:43,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 101/12323 [10:36<21:24:32,  6.31s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 101/12323 [10:36<21:24:32,  6.31s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130631,8 +26713,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11571/12318 [20:02:28<1:17:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11571/12318 [20:02:28<1:17:37,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 102/12323 [10:38<21:15:23,  6.26s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 102/12323 [10:38<21:15:23,  6.26s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130640,8 +26722,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11572/12318 [20:02:37<1:17:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11572/12318 [20:02:37<1:17:31,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 103/12323 [10:42<21:10:30,  6.24s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 103/12323 [10:42<21:10:30,  6.24s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130649,8 +26731,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11573/12318 [20:02:40<1:17:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11573/12318 [20:02:40<1:17:25,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 104/12323 [10:51<21:14:50,  6.26s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 104/12323 [10:51<21:14:50,  6.26s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130658,8 +26740,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11574/12318 [20:02:46<1:17:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11574/12318 [20:02:46<1:17:19,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 105/12323 [10:53<21:08:02,  6.23s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 105/12323 [10:53<21:08:02,  6.23s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130667,8 +26749,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11575/12318 [20:02:55<1:17:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11575/12318 [20:02:55<1:17:12,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 106/12323 [10:55<20:59:24,  6.19s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 106/12323 [10:55<20:59:24,  6.19s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130676,8 +26758,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11576/12318 [20:03:02<1:17:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11576/12318 [20:03:02<1:17:06,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 107/12323 [11:03<21:03:19,  6.20s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 107/12323 [11:03<21:03:19,  6.20s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130685,7 +26767,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11577/12318 [20:03:09<1:17:00,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 108/12323 [11:05<20:53:52,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 108/12323 [11:05<20:53:52,  6.16s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -130693,7 +26776,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11577/12318 [20:03:09<1:17:00,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 109/12323 [11:12<20:55:30,  6.17s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   1%| | 109/12323 [11:12<20:55:30,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130701,8 +26785,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11578/12318 [20:03:18<1:16:54,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11578/12318 [20:03:18<1:16:54,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 110/12323 [11:13<20:47:11,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 110/12323 [11:13<20:47:12,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130710,8 +26794,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11579/12318 [20:03:22<1:16:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11579/12318 [20:03:22<1:16:48,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 111/12323 [11:19<20:45:29,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 111/12323 [11:19<20:45:29,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130719,8 +26803,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11580/12318 [20:03:27<1:16:41,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11580/12318 [20:03:27<1:16:41,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 112/12323 [11:26<20:47:24,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 112/12323 [11:26<20:47:24,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130728,8 +26812,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11581/12318 [20:03:32<1:16:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11581/12318 [20:03:32<1:16:35,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 113/12323 [11:29<20:41:19,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 113/12323 [11:29<20:41:19,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130737,8 +26821,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11582/12318 [20:03:40<1:16:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11582/12318 [20:03:40<1:16:29,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 114/12323 [11:32<20:36:07,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 114/12323 [11:32<20:36:07,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130746,7 +26830,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11583/12318 [20:03:45<1:16:23,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 115/12323 [11:33<20:27:33,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 115/12323 [11:33<20:27:33,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130754,7 +26839,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11583/12318 [20:03:45<1:16:23,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 116/12323 [11:38<20:24:25,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 116/12323 [11:38<20:24:25,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130762,8 +26848,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11584/12318 [20:04:00<1:16:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11584/12318 [20:04:00<1:16:17,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 117/12323 [11:41<20:19:32,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 117/12323 [11:41<20:19:32,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130771,8 +26857,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11585/12318 [20:04:08<1:16:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11585/12318 [20:04:08<1:16:11,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 118/12323 [11:45<20:15:34,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 118/12323 [11:45<20:15:34,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130780,8 +26866,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11586/12318 [20:04:14<1:16:05,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11586/12318 [20:04:14<1:16:05,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 119/12323 [11:48<20:10:49,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 119/12323 [11:48<20:10:49,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130789,8 +26875,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11587/12318 [20:04:22<1:15:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11587/12318 [20:04:22<1:15:58,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 120/12323 [11:55<20:13:03,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 120/12323 [11:55<20:13:04,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130798,8 +26884,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11588/12318 [20:04:29<1:15:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11588/12318 [20:04:29<1:15:52,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 121/12323 [12:03<20:15:15,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 121/12323 [12:03<20:15:15,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130807,8 +26893,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11589/12318 [20:04:30<1:15:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11589/12318 [20:04:30<1:15:46,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 122/12323 [12:09<20:15:47,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 122/12323 [12:09<20:15:47,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130816,8 +26902,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11590/12318 [20:04:36<1:15:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11590/12318 [20:04:36<1:15:39,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 123/12323 [12:11<20:08:48,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 123/12323 [12:11<20:08:48,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130825,8 +26911,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11591/12318 [20:04:41<1:15:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11591/12318 [20:04:41<1:15:33,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 124/12323 [12:18<20:11:07,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 124/12323 [12:18<20:11:07,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130834,8 +26920,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11592/12318 [20:04:46<1:15:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11592/12318 [20:04:46<1:15:27,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 125/12323 [12:22<20:06:47,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 125/12323 [12:22<20:06:47,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130843,8 +26929,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11593/12318 [20:04:52<1:15:20,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11593/12318 [20:04:52<1:15:20,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 126/12323 [12:28<20:07:38,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 126/12323 [12:28<20:07:38,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130852,8 +26938,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11594/12318 [20:05:00<1:15:14,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11594/12318 [20:05:00<1:15:14,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 127/12323 [12:29<20:00:04,  5.90s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 127/12323 [12:29<20:00:04,  5.90s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130861,8 +26947,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11595/12318 [20:05:07<1:15:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11595/12318 [20:05:07<1:15:08,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 128/12323 [13:14<21:01:41,  6.21s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 128/12323 [13:14<21:01:41,  6.21s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130870,8 +26956,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11596/12318 [20:05:11<1:15:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11596/12318 [20:05:11<1:15:02,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 129/12323 [13:20<21:01:51,  6.21s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 129/12323 [13:20<21:01:51,  6.21s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130879,8 +26965,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11597/12318 [20:05:16<1:14:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11597/12318 [20:05:16<1:14:56,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 130/12323 [13:24<20:57:13,  6.19s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 130/12323 [13:24<20:57:13,  6.19s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130888,8 +26974,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11598/12318 [20:05:23<1:14:49,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11598/12318 [20:05:23<1:14:49,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 131/12323 [13:28<20:53:26,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 131/12323 [13:28<20:53:26,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130897,8 +26983,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11599/12318 [20:05:28<1:14:43,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11599/12318 [20:05:28<1:14:43,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 132/12323 [13:34<20:53:32,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 132/12323 [13:34<20:53:32,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130906,8 +26992,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11600/12318 [20:05:30<1:14:37,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11600/12318 [20:05:30<1:14:37,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 133/12323 [13:41<20:55:28,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 133/12323 [13:41<20:55:28,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130915,8 +27001,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11601/12318 [20:05:33<1:14:30,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11601/12318 [20:05:33<1:14:30,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 134/12323 [13:48<20:55:44,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 134/12323 [13:48<20:55:44,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130924,8 +27010,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11602/12318 [20:05:41<1:14:24,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11602/12318 [20:05:41<1:14:24,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 135/12323 [13:52<20:52:04,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 135/12323 [13:52<20:52:04,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130933,8 +27019,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11603/12318 [20:05:42<1:14:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11603/12318 [20:05:42<1:14:17,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 136/12323 [14:00<20:55:17,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 136/12323 [14:00<20:55:17,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130942,8 +27028,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11604/12318 [20:05:48<1:14:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11604/12318 [20:05:48<1:14:11,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 137/12323 [14:05<20:54:01,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 137/12323 [14:05<20:54:01,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130951,8 +27037,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11605/12318 [20:05:56<1:14:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11605/12318 [20:05:56<1:14:05,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 138/12323 [14:09<20:50:30,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 138/12323 [14:09<20:50:30,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130960,8 +27046,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11606/12318 [20:06:01<1:13:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11606/12318 [20:06:01<1:13:59,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 139/12323 [14:13<20:46:59,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 139/12323 [14:13<20:46:59,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130969,8 +27055,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11607/12318 [20:06:09<1:13:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11607/12318 [20:06:09<1:13:53,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 140/12323 [14:14<20:39:49,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 140/12323 [14:14<20:39:49,  6.11s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
@@ -130978,8 +27064,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11608/12318 [20:06:13<1:13:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11608/12318 [20:06:13<1:13:46,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 141/12323 [14:19<20:37:54,  6.10s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   1%| | 141/12323 [14:19<20:37:54,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130987,8 +27073,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11609/12318 [20:06:18<1:13:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11609/12318 [20:06:18<1:13:40,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 142/12323 [14:21<20:31:41,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 142/12323 [14:21<20:31:41,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -130996,8 +27082,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11610/12318 [20:06:26<1:13:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11610/12318 [20:06:26<1:13:34,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 143/12323 [14:24<20:26:59,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 143/12323 [14:24<20:26:59,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131005,8 +27091,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11611/12318 [20:06:31<1:13:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11611/12318 [20:06:31<1:13:27,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 144/12323 [14:31<20:28:49,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 144/12323 [14:31<20:28:49,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131014,8 +27100,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11612/12318 [20:06:38<1:13:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11612/12318 [20:06:38<1:13:21,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 145/12323 [14:36<20:26:18,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 145/12323 [14:36<20:26:18,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131023,8 +27109,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11613/12318 [20:06:45<1:13:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11613/12318 [20:06:45<1:13:15,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 146/12323 [14:38<20:21:41,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 146/12323 [14:38<20:21:41,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131032,8 +27118,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11614/12318 [20:06:53<1:13:09,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11614/12318 [20:06:53<1:13:09,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 147/12323 [14:40<20:15:43,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 147/12323 [14:40<20:15:43,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131041,8 +27127,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11615/12318 [20:06:57<1:13:03,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11615/12318 [20:06:57<1:13:03,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 148/12323 [14:49<20:19:07,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 148/12323 [14:49<20:19:07,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131050,8 +27136,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11616/12318 [20:07:16<1:12:57,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11616/12318 [20:07:16<1:12:57,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 149/12323 [14:54<20:18:12,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 149/12323 [14:54<20:18:12,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131059,8 +27145,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11617/12318 [20:07:20<1:12:51,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11617/12318 [20:07:20<1:12:51,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 150/12323 [14:57<20:13:48,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 150/12323 [14:57<20:13:48,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131068,8 +27154,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11618/12318 [20:07:25<1:12:44,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11618/12318 [20:07:25<1:12:44,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 151/12323 [14:59<20:08:44,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 151/12323 [14:59<20:08:44,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131077,8 +27163,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11619/12318 [20:07:31<1:12:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11619/12318 [20:07:31<1:12:38,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 152/12323 [15:03<20:05:47,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 152/12323 [15:03<20:05:47,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131086,8 +27172,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11620/12318 [20:07:39<1:12:32,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11620/12318 [20:07:39<1:12:32,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 153/12323 [15:10<20:07:42,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 153/12323 [15:10<20:07:42,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131095,8 +27181,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11621/12318 [20:07:43<1:12:26,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11621/12318 [20:07:43<1:12:26,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 154/12323 [15:13<20:03:28,  5.93s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 154/12323 [15:13<20:03:28,  5.93s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131104,8 +27190,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11622/12318 [20:07:47<1:12:19,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11622/12318 [20:07:47<1:12:19,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 155/12323 [15:20<20:03:58,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 155/12323 [15:20<20:03:58,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131113,8 +27199,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11623/12318 [20:07:56<1:12:13,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11623/12318 [20:07:56<1:12:13,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 156/12323 [15:23<20:00:28,  5.92s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 156/12323 [15:23<20:00:28,  5.92s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131122,8 +27208,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11624/12318 [20:07:58<1:12:07,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11624/12318 [20:07:58<1:12:07,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 157/12323 [15:30<20:02:20,  5.93s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 157/12323 [15:30<20:02:20,  5.93s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131131,8 +27217,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11625/12318 [20:08:06<1:12:01,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11625/12318 [20:08:06<1:12:01,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 158/12323 [15:38<20:04:05,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 158/12323 [15:38<20:04:05,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131140,7 +27226,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11626/12318 [20:08:10<1:11:54,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 159/12323 [15:46<20:07:06,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 159/12323 [15:46<20:07:07,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131148,7 +27235,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11626/12318 [20:08:10<1:11:54,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 160/12323 [16:29<20:53:35,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 160/12323 [16:29<20:53:35,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131156,8 +27244,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11627/12318 [20:08:19<1:11:48,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11627/12318 [20:08:19<1:11:48,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 161/12323 [16:34<20:52:27,  6.18s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 161/12323 [16:34<20:52:27,  6.18s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131165,8 +27253,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11628/12318 [20:08:21<1:11:42,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11628/12318 [20:08:21<1:11:42,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 162/12323 [16:39<20:49:59,  6.17s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 162/12323 [16:39<20:49:59,  6.17s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131174,8 +27262,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11629/12318 [20:08:27<1:11:35,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11629/12318 [20:08:27<1:11:35,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 163/12323 [16:42<20:46:16,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 163/12323 [16:42<20:46:16,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131183,8 +27271,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11630/12318 [20:08:33<1:11:29,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11630/12318 [20:08:33<1:11:29,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 164/12323 [16:45<20:41:56,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 164/12323 [16:45<20:41:56,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131192,8 +27280,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11631/12318 [20:08:41<1:11:23,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11631/12318 [20:08:41<1:11:23,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 165/12323 [16:48<20:38:20,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 165/12323 [16:48<20:38:20,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131201,8 +27289,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11632/12318 [20:08:50<1:11:17,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11632/12318 [20:08:50<1:11:17,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 166/12323 [16:51<20:34:45,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 166/12323 [16:51<20:34:45,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131210,8 +27298,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11633/12318 [20:08:55<1:11:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11633/12318 [20:08:55<1:11:11,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 167/12323 [16:56<20:33:39,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 167/12323 [16:56<20:33:39,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131219,8 +27307,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11634/12318 [20:09:02<1:11:04,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11634/12318 [20:09:02<1:11:04,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 168/12323 [16:58<20:27:44,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 168/12323 [16:58<20:27:44,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131228,8 +27316,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11635/12318 [20:09:11<1:10:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11635/12318 [20:09:11<1:10:58,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 169/12323 [16:59<20:21:53,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 169/12323 [16:59<20:21:53,  6.03s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
@@ -131237,8 +27325,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11636/12318 [20:09:19<1:10:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11636/12318 [20:09:19<1:10:52,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 170/12323 [17:02<20:17:54,  6.01s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   1%| | 170/12323 [17:02<20:17:54,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131246,8 +27334,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11637/12318 [20:09:26<1:10:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11637/12318 [20:09:26<1:10:46,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 171/12323 [17:06<20:15:47,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 171/12323 [17:06<20:15:47,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131255,8 +27343,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11638/12318 [20:09:27<1:10:40,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11638/12318 [20:09:27<1:10:40,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 172/12323 [17:10<20:13:44,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 172/12323 [17:10<20:13:44,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131264,8 +27352,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11639/12318 [20:09:33<1:10:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11639/12318 [20:09:33<1:10:33,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 173/12323 [17:13<20:09:54,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 173/12323 [17:13<20:09:54,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131273,8 +27361,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  94%|▉| 11640/12318 [20:09:42<1:10:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  94%|▉| 11640/12318 [20:09:42<1:10:27,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 174/12323 [17:21<20:11:27,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 174/12323 [17:21<20:11:27,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131282,8 +27370,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11641/12318 [20:09:50<1:10:21,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11641/12318 [20:09:50<1:10:21,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 175/12323 [17:29<20:14:16,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 175/12323 [17:29<20:14:16,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131291,8 +27379,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11642/12318 [20:09:52<1:10:15,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11642/12318 [20:09:52<1:10:15,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 176/12323 [17:36<20:14:42,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 176/12323 [17:36<20:14:42,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131300,8 +27388,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11643/12318 [20:09:59<1:10:08,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11643/12318 [20:09:59<1:10:08,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 177/12323 [17:40<20:12:42,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 177/12323 [17:40<20:12:42,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131309,8 +27397,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11644/12318 [20:10:07<1:10:02,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11644/12318 [20:10:07<1:10:02,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 178/12323 [17:43<20:09:02,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 178/12323 [17:43<20:09:02,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131318,8 +27406,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11645/12318 [20:10:08<1:09:56,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11645/12318 [20:10:08<1:09:56,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 179/12323 [17:49<20:09:25,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 179/12323 [17:49<20:09:25,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131327,8 +27415,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11646/12318 [20:10:12<1:09:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11646/12318 [20:10:12<1:09:49,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 180/12323 [17:52<20:05:45,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 180/12323 [17:52<20:05:45,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131336,8 +27424,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11647/12318 [20:10:18<1:09:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11647/12318 [20:10:18<1:09:43,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 181/12323 [17:58<20:06:08,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 181/12323 [17:58<20:06:08,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131345,8 +27433,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11648/12318 [20:10:35<1:09:38,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11648/12318 [20:10:35<1:09:38,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 182/12323 [18:06<20:07:38,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 182/12323 [18:06<20:07:38,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131354,8 +27442,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11649/12318 [20:10:41<1:09:31,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11649/12318 [20:10:41<1:09:31,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 183/12323 [18:09<20:04:35,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 183/12323 [18:09<20:04:35,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131363,8 +27451,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11650/12318 [20:10:43<1:09:25,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11650/12318 [20:10:43<1:09:25,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   1%| | 184/12323 [18:12<20:01:34,  5.94s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   1%| | 184/12323 [18:12<20:01:34,  5.94s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131372,8 +27460,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11651/12318 [20:10:48<1:09:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11651/12318 [20:10:48<1:09:18,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 185/12323 [18:21<20:04:11,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 185/12323 [18:21<20:04:11,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131381,8 +27469,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11652/12318 [20:10:53<1:09:12,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11652/12318 [20:10:53<1:09:12,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 186/12323 [18:28<20:05:39,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 186/12323 [18:28<20:05:39,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131390,8 +27478,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11653/12318 [20:10:57<1:09:06,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11653/12318 [20:10:57<1:09:06,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 187/12323 [18:36<20:07:07,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 187/12323 [18:36<20:07:07,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131399,8 +27487,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11654/12318 [20:11:02<1:09:00,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11654/12318 [20:11:02<1:09:00,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 188/12323 [18:42<20:07:30,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 188/12323 [18:42<20:07:30,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131408,8 +27496,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11655/12318 [20:11:11<1:08:53,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11655/12318 [20:11:11<1:08:53,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 189/12323 [18:50<20:10:04,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 189/12323 [18:50<20:10:04,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131417,8 +27505,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11656/12318 [20:11:15<1:08:47,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11656/12318 [20:11:15<1:08:47,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 190/12323 [18:56<20:09:18,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 190/12323 [18:56<20:09:18,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131426,8 +27514,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11657/12318 [20:11:19<1:08:41,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11657/12318 [20:11:19<1:08:41,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 191/12323 [19:03<20:10:47,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 191/12323 [19:03<20:10:47,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131435,8 +27523,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11658/12318 [20:11:23<1:08:34,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11658/12318 [20:11:23<1:08:34,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 192/12323 [19:41<20:44:26,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 192/12323 [19:41<20:44:26,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131444,8 +27532,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11659/12318 [20:11:32<1:08:28,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11659/12318 [20:11:32<1:08:28,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 193/12323 [19:47<20:43:32,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 193/12323 [19:47<20:43:32,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131453,8 +27541,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11660/12318 [20:11:40<1:08:22,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11660/12318 [20:11:40<1:08:22,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 194/12323 [19:49<20:39:54,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 194/12323 [19:49<20:39:54,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131462,8 +27550,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11661/12318 [20:11:43<1:08:16,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11661/12318 [20:11:43<1:08:16,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 195/12323 [19:51<20:35:21,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 195/12323 [19:51<20:35:21,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131471,8 +27559,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11662/12318 [20:11:46<1:08:09,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11662/12318 [20:11:46<1:08:09,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 196/12323 [19:56<20:33:24,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 196/12323 [19:56<20:33:24,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131480,7 +27568,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11663/12318 [20:11:54<1:08:03,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 197/12323 [20:02<20:33:36,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 197/12323 [20:02<20:33:36,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131488,7 +27577,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11663/12318 [20:11:54<1:08:03,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 198/12323 [20:08<20:33:45,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 198/12323 [20:08<20:33:45,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131496,8 +27586,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11664/12318 [20:11:59<1:07:57,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11664/12318 [20:11:59<1:07:57,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 199/12323 [20:12<20:30:49,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 199/12323 [20:12<20:30:49,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131505,8 +27595,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11665/12318 [20:12:06<1:07:51,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11665/12318 [20:12:06<1:07:51,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 200/12323 [20:15<20:27:55,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 200/12323 [20:15<20:27:55,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131514,8 +27604,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11666/12318 [20:12:12<1:07:44,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11666/12318 [20:12:12<1:07:44,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 201/12323 [20:20<20:27:03,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 201/12323 [20:20<20:27:03,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131523,8 +27613,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11667/12318 [20:12:19<1:07:38,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11667/12318 [20:12:19<1:07:38,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 202/12323 [20:24<20:24:09,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 202/12323 [20:24<20:24:10,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131532,8 +27622,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11668/12318 [20:12:27<1:07:32,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11668/12318 [20:12:27<1:07:32,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 203/12323 [20:28<20:22:49,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 203/12323 [20:28<20:22:49,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131541,8 +27631,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11669/12318 [20:12:28<1:07:26,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11669/12318 [20:12:28<1:07:26,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 204/12323 [20:35<20:23:05,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 204/12323 [20:35<20:23:05,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131550,8 +27640,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11670/12318 [20:12:33<1:07:19,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11670/12318 [20:12:33<1:07:19,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 205/12323 [20:39<20:20:46,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 205/12323 [20:39<20:20:46,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131559,8 +27649,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11671/12318 [20:12:41<1:07:13,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11671/12318 [20:12:41<1:07:13,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 206/12323 [20:42<20:18:30,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 206/12323 [20:42<20:18:30,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131568,8 +27658,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11672/12318 [20:12:46<1:07:07,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11672/12318 [20:12:46<1:07:07,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 207/12323 [20:45<20:15:13,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 207/12323 [20:45<20:15:13,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131577,8 +27667,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11673/12318 [20:12:55<1:07:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11673/12318 [20:12:55<1:07:01,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 208/12323 [20:51<20:14:28,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 208/12323 [20:51<20:14:28,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131586,8 +27676,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11674/12318 [20:13:03<1:06:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11674/12318 [20:13:03<1:06:55,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 209/12323 [20:59<20:16:41,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 209/12323 [20:59<20:16:41,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131595,8 +27685,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11675/12318 [20:13:09<1:06:48,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11675/12318 [20:13:09<1:06:48,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 210/12323 [21:03<20:14:55,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 210/12323 [21:03<20:14:55,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131604,8 +27694,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11676/12318 [20:13:12<1:06:42,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11676/12318 [20:13:12<1:06:42,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 211/12323 [21:12<20:17:02,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131613,8 +27702,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11677/12318 [20:13:20<1:06:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11677/12318 [20:13:20<1:06:36,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 211/12323 [21:12<20:17:02,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131622,8 +27710,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11678/12318 [20:13:28<1:06:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11678/12318 [20:13:28<1:06:30,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 212/12323 [21:18<20:17:09,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 212/12323 [21:18<20:17:09,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131631,8 +27719,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11679/12318 [20:13:31<1:06:23,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11679/12318 [20:13:31<1:06:23,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 213/12323 [21:23<20:16:17,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 213/12323 [21:23<20:16:17,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131640,8 +27728,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11680/12318 [20:13:48<1:06:18,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11680/12318 [20:13:48<1:06:18,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 214/12323 [21:24<20:11:40,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 214/12323 [21:24<20:11:40,  6.00s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -131649,8 +27737,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11681/12318 [20:13:52<1:06:11,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11681/12318 [20:13:52<1:06:11,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 215/12323 [21:28<20:09:27,  5.99s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   2%| | 215/12323 [21:28<20:09:27,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131658,8 +27746,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11682/12318 [20:13:53<1:06:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11682/12318 [20:13:53<1:06:05,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 216/12323 [21:34<20:09:35,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 216/12323 [21:34<20:09:35,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131667,8 +27755,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11683/12318 [20:13:58<1:05:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11683/12318 [20:13:58<1:05:58,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 217/12323 [21:39<20:08:19,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 217/12323 [21:39<20:08:19,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131676,8 +27764,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11684/12318 [20:14:05<1:05:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11684/12318 [20:14:05<1:05:52,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 218/12323 [21:43<20:06:36,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 218/12323 [21:43<20:06:36,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131685,8 +27773,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11685/12318 [20:14:09<1:05:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11685/12318 [20:14:09<1:05:46,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 219/12323 [21:47<20:04:28,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 219/12323 [21:47<20:04:28,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131694,8 +27782,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11686/12318 [20:14:14<1:05:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11686/12318 [20:14:14<1:05:40,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 220/12323 [21:51<20:02:18,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 220/12323 [21:51<20:02:18,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131703,8 +27791,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11687/12318 [20:14:18<1:05:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11687/12318 [20:14:18<1:05:33,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 221/12323 [21:57<20:02:29,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 221/12323 [21:57<20:02:29,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131712,8 +27800,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11688/12318 [20:14:21<1:05:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11688/12318 [20:14:21<1:05:27,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 222/12323 [22:03<20:02:39,  5.96s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 222/12323 [22:03<20:02:39,  5.96s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131721,8 +27809,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11689/12318 [20:14:29<1:05:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11689/12318 [20:14:29<1:05:21,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 223/12323 [22:06<19:59:37,  5.95s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 223/12323 [22:06<19:59:37,  5.95s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131730,8 +27818,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11690/12318 [20:14:34<1:05:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11690/12318 [20:14:34<1:05:14,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 224/12323 [22:45<20:29:31,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 224/12323 [22:45<20:29:31,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131739,8 +27827,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11691/12318 [20:14:37<1:05:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11691/12318 [20:14:37<1:05:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 225/12323 [22:49<20:26:52,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 225/12323 [22:49<20:26:52,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131748,8 +27836,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11692/12318 [20:14:39<1:05:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11692/12318 [20:14:39<1:05:02,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 226/12323 [22:53<20:25:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 226/12323 [22:53<20:25:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131757,8 +27845,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11693/12318 [20:14:43<1:04:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11693/12318 [20:14:43<1:04:55,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 227/12323 [22:59<20:24:48,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 227/12323 [22:59<20:24:48,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131766,8 +27854,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11694/12318 [20:14:50<1:04:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11694/12318 [20:14:50<1:04:49,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 228/12323 [23:00<20:20:56,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 228/12323 [23:00<20:20:56,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131775,7 +27863,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11695/12318 [20:14:53<1:04:43,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 229/12323 [23:07<20:21:07,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 229/12323 [23:07<20:21:07,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131783,7 +27872,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11695/12318 [20:14:53<1:04:43,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 230/12323 [23:14<20:22:09,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 230/12323 [23:14<20:22:09,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131791,8 +27881,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11696/12318 [20:15:01<1:04:36,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11696/12318 [20:15:01<1:04:36,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 231/12323 [23:17<20:19:10,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 231/12323 [23:17<20:19:10,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131800,8 +27890,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11697/12318 [20:15:08<1:04:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11697/12318 [20:15:08<1:04:30,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 232/12323 [23:25<20:20:59,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 232/12323 [23:25<20:20:59,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131809,8 +27899,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11698/12318 [20:15:09<1:04:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11698/12318 [20:15:09<1:04:24,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 233/12323 [23:34<20:22:51,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 233/12323 [23:34<20:22:51,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131818,8 +27908,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11699/12318 [20:15:12<1:04:17,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11699/12318 [20:15:12<1:04:17,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 234/12323 [23:35<20:18:36,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 234/12323 [23:35<20:18:36,  6.05s/it, v_num=i2o7, train/loss=6."
      ]
     },
     {
@@ -131827,8 +27917,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11700/12318 [20:15:17<1:04:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11700/12318 [20:15:17<1:04:11,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 235/12323 [23:42<20:19:33,  6.05s/it, v_num=i2o7, train/loss=6.\r",
+      "Epoch 0:   2%| | 235/12323 [23:42<20:19:33,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131836,8 +27926,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11701/12318 [20:15:18<1:04:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11701/12318 [20:15:18<1:04:05,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 236/12323 [23:44<20:16:12,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 236/12323 [23:44<20:16:12,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131845,8 +27935,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11702/12318 [20:15:22<1:03:58,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11702/12318 [20:15:22<1:03:58,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 237/12323 [23:51<20:16:20,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 237/12323 [23:51<20:16:20,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131854,8 +27944,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11703/12318 [20:15:30<1:03:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11703/12318 [20:15:30<1:03:52,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 238/12323 [23:55<20:15:12,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 238/12323 [23:55<20:15:12,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131863,8 +27953,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11704/12318 [20:15:38<1:03:46,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11704/12318 [20:15:38<1:03:46,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 239/12323 [24:00<20:13:35,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 239/12323 [24:00<20:13:35,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131872,8 +27962,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11705/12318 [20:15:46<1:03:40,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11705/12318 [20:15:46<1:03:40,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 240/12323 [24:05<20:12:53,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 240/12323 [24:05<20:12:54,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131881,8 +27971,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11706/12318 [20:15:48<1:03:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11706/12318 [20:15:48<1:03:33,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 241/12323 [24:07<20:09:14,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 241/12323 [24:07<20:09:14,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131890,8 +27980,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11707/12318 [20:15:53<1:03:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11707/12318 [20:15:53<1:03:27,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 242/12323 [24:15<20:11:06,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 242/12323 [24:15<20:11:06,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131899,8 +27989,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11708/12318 [20:16:02<1:03:21,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11708/12318 [20:16:02<1:03:21,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 243/12323 [24:20<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 243/12323 [24:20<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131908,8 +27998,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11709/12318 [20:16:09<1:03:15,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11709/12318 [20:16:09<1:03:15,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 244/12323 [24:28<20:11:33,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 244/12323 [24:28<20:11:33,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131917,8 +28007,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11710/12318 [20:16:14<1:03:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11710/12318 [20:16:14<1:03:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 245/12323 [24:32<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 245/12323 [24:32<20:09:40,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131926,8 +28016,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11711/12318 [20:16:19<1:03:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11711/12318 [20:16:19<1:03:02,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 246/12323 [24:38<20:09:51,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 246/12323 [24:38<20:09:51,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131935,8 +28025,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11712/12318 [20:17:05<1:02:58,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11712/12318 [20:17:05<1:02:58,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 247/12323 [24:41<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 247/12323 [24:41<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131944,8 +28034,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11713/12318 [20:17:11<1:02:52,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11713/12318 [20:17:11<1:02:52,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 248/12323 [24:48<20:07:48,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 248/12323 [24:48<20:07:48,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131953,8 +28043,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11714/12318 [20:17:20<1:02:46,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11714/12318 [20:17:20<1:02:46,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 249/12323 [24:54<20:08:05,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 249/12323 [24:54<20:08:05,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131962,8 +28052,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11715/12318 [20:17:27<1:02:39,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11715/12318 [20:17:27<1:02:39,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 250/12323 [25:00<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 250/12323 [25:00<20:07:33,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131971,8 +28061,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11716/12318 [20:17:35<1:02:33,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11716/12318 [20:17:35<1:02:33,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 251/12323 [25:06<20:07:46,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 251/12323 [25:06<20:07:46,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131980,8 +28070,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11717/12318 [20:17:37<1:02:27,  6.24s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11717/12318 [20:17:37<1:02:27,  6.24s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 252/12323 [25:08<20:03:55,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 252/12323 [25:08<20:03:55,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131989,8 +28079,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11718/12318 [20:17:40<1:02:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11718/12318 [20:17:40<1:02:20,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 253/12323 [25:15<20:04:56,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 253/12323 [25:15<20:04:56,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -131998,8 +28088,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11719/12318 [20:17:43<1:02:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11719/12318 [20:17:43<1:02:14,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 254/12323 [25:19<20:03:29,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 254/12323 [25:19<20:03:29,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132007,8 +28097,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11720/12318 [20:17:47<1:02:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11720/12318 [20:17:47<1:02:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 255/12323 [25:24<20:02:25,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 255/12323 [25:24<20:02:25,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132016,8 +28106,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11721/12318 [20:17:54<1:02:01,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11721/12318 [20:17:54<1:02:01,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 256/12323 [25:58<20:24:35,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 256/12323 [25:58<20:24:35,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132025,8 +28115,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11722/12318 [20:18:01<1:01:55,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11722/12318 [20:18:01<1:01:55,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 257/12323 [26:01<20:21:30,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 257/12323 [26:01<20:21:30,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132034,8 +28124,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11723/12318 [20:18:09<1:01:49,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11723/12318 [20:18:09<1:01:49,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 258/12323 [26:07<20:21:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 258/12323 [26:07<20:21:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132043,8 +28133,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11724/12318 [20:18:13<1:01:43,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11724/12318 [20:18:13<1:01:43,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 259/12323 [26:10<20:19:21,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 259/12323 [26:10<20:19:21,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132052,8 +28142,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11725/12318 [20:18:20<1:01:37,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11725/12318 [20:18:20<1:01:37,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 260/12323 [26:18<20:20:15,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 260/12323 [26:18<20:20:15,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132061,8 +28151,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11726/12318 [20:18:22<1:01:30,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11726/12318 [20:18:22<1:01:30,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 261/12323 [26:20<20:17:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 261/12323 [26:20<20:17:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132070,8 +28160,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11727/12318 [20:18:28<1:01:24,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11727/12318 [20:18:28<1:01:24,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 262/12323 [26:27<20:17:42,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 262/12323 [26:27<20:17:42,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132079,8 +28169,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11728/12318 [20:18:32<1:01:18,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11728/12318 [20:18:32<1:01:18,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 263/12323 [26:31<20:16:34,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 263/12323 [26:31<20:16:34,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132088,8 +28178,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11729/12318 [20:18:37<1:01:11,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11729/12318 [20:18:37<1:01:11,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 264/12323 [26:35<20:14:19,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 264/12323 [26:35<20:14:19,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132097,8 +28187,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11730/12318 [20:18:43<1:01:05,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11730/12318 [20:18:43<1:01:05,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 265/12323 [26:39<20:13:15,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 265/12323 [26:39<20:13:15,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132106,8 +28196,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11731/12318 [20:18:48<1:00:59,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11731/12318 [20:18:48<1:00:59,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 266/12323 [26:44<20:11:48,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 266/12323 [26:44<20:11:48,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132115,8 +28205,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11732/12318 [20:18:50<1:00:52,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11732/12318 [20:18:50<1:00:52,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 267/12323 [26:48<20:10:23,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 267/12323 [26:48<20:10:23,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132124,7 +28214,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11733/12318 [20:18:53<1:00:46,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 268/12323 [26:57<20:12:30,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 268/12323 [26:57<20:12:30,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132132,7 +28223,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11733/12318 [20:18:53<1:00:46,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 269/12323 [27:00<20:10:21,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 269/12323 [27:00<20:10:21,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132140,8 +28232,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11734/12318 [20:18:55<1:00:39,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11734/12318 [20:18:55<1:00:39,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 270/12323 [27:07<20:11:12,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 270/12323 [27:07<20:11:12,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132149,8 +28241,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11735/12318 [20:19:00<1:00:33,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11735/12318 [20:19:00<1:00:33,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 271/12323 [27:13<20:10:39,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 271/12323 [27:13<20:10:39,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132158,8 +28250,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11736/12318 [20:19:03<1:00:27,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11736/12318 [20:19:03<1:00:27,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 272/12323 [27:18<20:09:41,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 272/12323 [27:18<20:09:41,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132167,8 +28259,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11737/12318 [20:19:08<1:00:20,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11737/12318 [20:19:08<1:00:20,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 273/12323 [27:24<20:09:52,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 273/12323 [27:24<20:09:52,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132176,8 +28268,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11738/12318 [20:19:12<1:00:14,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11738/12318 [20:19:12<1:00:14,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 274/12323 [27:27<20:07:45,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 274/12323 [27:27<20:07:45,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132185,8 +28277,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11739/12318 [20:19:16<1:00:08,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11739/12318 [20:19:16<1:00:08,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 275/12323 [27:31<20:05:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 275/12323 [27:31<20:05:41,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132194,8 +28286,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11740/12318 [20:19:24<1:00:02,  6.23s/it, v_num=e4xv, train/los\r",
-      "Epoch 0:  95%|▉| 11740/12318 [20:19:24<1:00:02,  6.23s/it, v_num=e4xv, train/los"
+      "Epoch 0:   2%| | 276/12323 [27:39<20:07:26,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 276/12323 [27:39<20:07:26,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132203,8 +28295,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11741/12318 [20:19:31<59:55,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11741/12318 [20:19:31<59:55,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 277/12323 [27:42<20:04:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 277/12323 [27:42<20:04:41,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132212,7 +28304,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11742/12318 [20:19:40<59:49,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 278/12323 [27:47<20:03:47,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 278/12323 [27:47<20:03:47,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132220,7 +28313,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11742/12318 [20:19:40<59:49,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 279/12323 [27:52<20:03:13,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 279/12323 [27:52<20:03:13,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132228,8 +28322,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11743/12318 [20:19:44<59:43,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11743/12318 [20:19:44<59:43,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 280/12323 [28:00<20:04:52,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 280/12323 [28:00<20:04:52,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132237,8 +28331,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11744/12318 [20:20:11<59:38,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11744/12318 [20:20:11<59:38,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 281/12323 [28:04<20:02:50,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 281/12323 [28:04<20:02:50,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132246,8 +28340,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11745/12318 [20:20:16<59:32,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11745/12318 [20:20:16<59:32,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 282/12323 [28:10<20:03:04,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 282/12323 [28:10<20:03:04,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132255,8 +28349,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11746/12318 [20:20:18<59:25,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11746/12318 [20:20:18<59:25,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 283/12323 [28:15<20:02:11,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 283/12323 [28:15<20:02:11,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132264,8 +28358,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11747/12318 [20:20:22<59:19,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11747/12318 [20:20:22<59:19,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 284/12323 [28:21<20:02:25,  5.99s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 284/12323 [28:21<20:02:25,  5.99s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132273,8 +28367,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11748/12318 [20:20:30<59:13,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11748/12318 [20:20:30<59:13,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 285/12323 [28:23<19:59:20,  5.98s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 285/12323 [28:23<19:59:20,  5.98s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132282,8 +28376,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11749/12318 [20:20:34<59:06,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11749/12318 [20:20:34<59:06,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 286/12323 [28:26<19:57:00,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 286/12323 [28:26<19:57:00,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132291,8 +28385,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11750/12318 [20:20:38<59:00,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11750/12318 [20:20:38<59:00,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 287/12323 [28:32<19:57:13,  5.97s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 287/12323 [28:32<19:57:13,  5.97s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132300,7 +28394,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11751/12318 [20:20:43<58:54,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 288/12323 [29:19<20:25:29,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 288/12323 [29:19<20:25:29,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132308,7 +28403,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11751/12318 [20:20:43<58:54,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 289/12323 [29:22<20:23:04,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 289/12323 [29:22<20:23:04,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132316,8 +28412,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11752/12318 [20:20:48<58:47,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11752/12318 [20:20:48<58:47,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 290/12323 [29:27<20:22:23,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 290/12323 [29:27<20:22:23,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132325,8 +28421,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11753/12318 [20:20:57<58:41,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11753/12318 [20:20:57<58:41,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 291/12323 [29:31<20:20:38,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 291/12323 [29:31<20:20:38,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132334,8 +28430,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11754/12318 [20:21:03<58:35,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11754/12318 [20:21:03<58:35,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 292/12323 [29:34<20:18:37,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 292/12323 [29:34<20:18:37,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132343,8 +28439,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11755/12318 [20:21:11<58:29,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11755/12318 [20:21:11<58:29,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 293/12323 [29:39<20:17:37,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 293/12323 [29:39<20:17:37,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132352,8 +28448,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11756/12318 [20:21:19<58:23,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11756/12318 [20:21:19<58:23,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 294/12323 [29:44<20:16:36,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 294/12323 [29:44<20:16:36,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132361,8 +28457,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11757/12318 [20:21:26<58:16,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11757/12318 [20:21:26<58:16,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 295/12323 [29:47<20:14:56,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 295/12323 [29:47<20:14:56,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132370,8 +28466,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11758/12318 [20:21:32<58:10,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11758/12318 [20:21:32<58:10,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 296/12323 [29:50<20:12:36,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 296/12323 [29:50<20:12:36,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132379,8 +28475,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11759/12318 [20:21:34<58:04,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11759/12318 [20:21:34<58:04,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 297/12323 [29:55<20:12:00,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 297/12323 [29:55<20:12:00,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132388,8 +28484,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11760/12318 [20:21:35<57:57,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11760/12318 [20:21:35<57:57,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 298/12323 [29:59<20:10:27,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 298/12323 [29:59<20:10:27,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132397,8 +28493,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11761/12318 [20:21:43<57:51,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11761/12318 [20:21:43<57:51,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 299/12323 [30:04<20:09:31,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 299/12323 [30:04<20:09:31,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132406,8 +28502,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11762/12318 [20:21:48<57:45,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11762/12318 [20:21:48<57:45,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 300/12323 [30:09<20:08:56,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 300/12323 [30:09<20:08:56,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132415,8 +28511,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  95%|▉| 11763/12318 [20:21:53<57:39,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  95%|▉| 11763/12318 [20:21:53<57:39,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 301/12323 [30:13<20:07:20,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 301/12323 [30:13<20:07:20,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132424,8 +28520,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11764/12318 [20:21:56<57:32,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11764/12318 [20:21:56<57:32,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 302/12323 [30:21<20:08:04,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 302/12323 [30:21<20:08:04,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132433,7 +28529,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11765/12318 [20:22:03<57:26,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 303/12323 [30:24<20:06:08,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 303/12323 [30:24<20:06:08,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132441,7 +28538,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11765/12318 [20:22:03<57:26,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 304/12323 [30:32<20:07:28,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 304/12323 [30:32<20:07:28,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132449,8 +28547,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11766/12318 [20:22:07<57:20,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11766/12318 [20:22:07<57:20,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 305/12323 [30:36<20:06:12,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 305/12323 [30:36<20:06:12,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132458,8 +28556,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11767/12318 [20:22:16<57:14,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11767/12318 [20:22:16<57:14,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 306/12323 [30:44<20:07:35,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 306/12323 [30:44<20:07:35,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132467,8 +28565,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11768/12318 [20:22:19<57:07,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11768/12318 [20:22:19<57:07,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 307/12323 [30:51<20:07:40,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 307/12323 [30:51<20:07:40,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132476,8 +28574,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11769/12318 [20:22:26<57:01,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11769/12318 [20:22:26<57:01,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   2%| | 308/12323 [30:58<20:08:19,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   2%| | 308/12323 [30:58<20:08:19,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132485,8 +28583,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11770/12318 [20:22:35<56:55,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11770/12318 [20:22:35<56:55,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 309/12323 [31:01<20:06:24,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 309/12323 [31:01<20:06:24,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132494,8 +28592,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11771/12318 [20:22:41<56:49,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11771/12318 [20:22:41<56:49,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 310/12323 [31:07<20:06:27,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 310/12323 [31:07<20:06:27,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132503,8 +28601,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11772/12318 [20:22:49<56:42,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11772/12318 [20:22:49<56:42,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 311/12323 [31:14<20:06:31,  6.03s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 311/12323 [31:14<20:06:31,  6.03s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132512,8 +28610,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11773/12318 [20:22:50<56:36,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11773/12318 [20:22:50<56:36,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 312/12323 [31:16<20:03:59,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 312/12323 [31:16<20:03:59,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132521,8 +28619,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11774/12318 [20:22:53<56:30,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11774/12318 [20:22:53<56:30,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 313/12323 [31:24<20:05:18,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 313/12323 [31:24<20:05:18,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132530,8 +28628,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11775/12318 [20:22:58<56:23,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11775/12318 [20:22:58<56:23,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 314/12323 [31:30<20:04:43,  6.02s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 314/12323 [31:30<20:04:43,  6.02s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132539,8 +28637,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11776/12318 [20:23:40<56:19,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11776/12318 [20:23:40<56:19,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 315/12323 [31:34<20:03:30,  6.01s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 315/12323 [31:34<20:03:30,  6.01s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132548,8 +28646,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11777/12318 [20:23:45<56:12,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11777/12318 [20:23:45<56:12,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 316/12323 [31:35<20:00:41,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 316/12323 [31:35<20:00:41,  6.00s/it, v_num=i2o7, train/loss=4."
      ]
     },
     {
@@ -132557,8 +28655,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11778/12318 [20:23:54<56:06,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11778/12318 [20:23:54<56:06,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 317/12323 [31:43<20:01:22,  6.00s/it, v_num=i2o7, train/loss=4.\r",
+      "Epoch 0:   3%| | 317/12323 [31:43<20:01:22,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132566,8 +28664,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11779/12318 [20:23:58<56:00,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11779/12318 [20:23:58<56:00,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 318/12323 [31:46<19:59:32,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 318/12323 [31:46<19:59:32,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132575,8 +28673,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11780/12318 [20:24:05<55:54,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11780/12318 [20:24:05<55:54,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 319/12323 [31:52<19:59:38,  6.00s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 319/12323 [31:52<19:59:38,  6.00s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132584,8 +28682,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11781/12318 [20:24:11<55:48,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11781/12318 [20:24:11<55:48,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 320/12323 [32:44<20:27:51,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 320/12323 [32:44<20:27:51,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132593,8 +28691,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11782/12318 [20:24:13<55:41,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11782/12318 [20:24:13<55:41,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 321/12323 [32:47<20:26:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 321/12323 [32:47<20:26:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132602,7 +28700,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11783/12318 [20:24:19<55:35,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 322/12323 [32:54<20:26:25,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 322/12323 [32:54<20:26:25,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132610,7 +28709,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11783/12318 [20:24:19<55:35,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 323/12323 [32:57<20:24:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 323/12323 [32:57<20:24:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132618,8 +28718,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11784/12318 [20:24:24<55:29,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11784/12318 [20:24:24<55:29,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 324/12323 [32:59<20:22:05,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 324/12323 [32:59<20:22:05,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132627,8 +28727,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11785/12318 [20:24:25<55:22,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11785/12318 [20:24:25<55:22,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 325/12323 [33:01<20:19:01,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 325/12323 [33:01<20:19:01,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132636,8 +28736,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11786/12318 [20:24:26<55:16,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11786/12318 [20:24:26<55:16,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 326/12323 [33:09<20:20:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 326/12323 [33:09<20:20:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132645,8 +28745,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11787/12318 [20:24:30<55:09,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11787/12318 [20:24:30<55:09,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 327/12323 [33:14<20:19:36,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 327/12323 [33:14<20:19:36,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132654,8 +28754,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11788/12318 [20:24:34<55:03,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11788/12318 [20:24:34<55:03,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 328/12323 [33:22<20:20:16,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 328/12323 [33:22<20:20:16,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132663,8 +28763,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11789/12318 [20:24:37<54:57,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11789/12318 [20:24:37<54:57,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 329/12323 [33:29<20:20:52,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 329/12323 [33:29<20:20:52,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132672,8 +28772,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11790/12318 [20:24:38<54:50,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11790/12318 [20:24:38<54:50,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 330/12323 [33:35<20:20:50,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 330/12323 [33:35<20:20:50,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132681,8 +28781,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11791/12318 [20:24:43<54:44,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11791/12318 [20:24:43<54:44,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 331/12323 [33:42<20:21:25,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 331/12323 [33:42<20:21:25,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132690,8 +28790,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11792/12318 [20:24:47<54:38,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11792/12318 [20:24:47<54:38,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 332/12323 [33:46<20:20:10,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 332/12323 [33:46<20:20:10,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132699,8 +28799,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11793/12318 [20:24:52<54:31,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11793/12318 [20:24:52<54:31,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 333/12323 [33:52<20:19:31,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 333/12323 [33:52<20:19:31,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132708,8 +28808,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11794/12318 [20:24:57<54:25,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11794/12318 [20:24:57<54:25,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 334/12323 [34:00<20:20:40,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 334/12323 [34:00<20:20:40,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132717,8 +28817,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11795/12318 [20:24:59<54:19,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11795/12318 [20:24:59<54:19,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 335/12323 [34:04<20:19:09,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 335/12323 [34:04<20:19:09,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132726,8 +28826,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11796/12318 [20:25:05<54:12,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11796/12318 [20:25:05<54:12,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 336/12323 [34:06<20:17:03,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 336/12323 [34:06<20:17:03,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132735,8 +28835,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11797/12318 [20:25:14<54:06,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11797/12318 [20:25:14<54:06,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 337/12323 [34:15<20:18:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 337/12323 [34:15<20:18:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132744,8 +28844,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11798/12318 [20:25:17<54:00,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11798/12318 [20:25:17<54:00,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 338/12323 [34:21<20:18:15,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 338/12323 [34:21<20:18:15,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132753,8 +28853,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11799/12318 [20:25:23<53:54,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11799/12318 [20:25:23<53:54,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 339/12323 [34:27<20:18:17,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 339/12323 [34:27<20:18:17,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132762,8 +28862,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11800/12318 [20:25:24<53:47,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11800/12318 [20:25:24<53:47,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 340/12323 [34:32<20:17:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 340/12323 [34:32<20:17:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132771,8 +28871,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11801/12318 [20:25:31<53:41,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11801/12318 [20:25:31<53:41,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 341/12323 [34:33<20:14:10,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 341/12323 [34:33<20:14:10,  6.08s/it, v_num=i2o7, train/loss=5."
      ]
     },
     {
@@ -132780,8 +28880,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11802/12318 [20:25:38<53:35,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11802/12318 [20:25:38<53:35,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 342/12323 [34:39<20:14:13,  6.08s/it, v_num=i2o7, train/loss=5.\r",
+      "Epoch 0:   3%| | 342/12323 [34:39<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132789,8 +28889,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11803/12318 [20:25:42<53:28,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11803/12318 [20:25:42<53:28,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 343/12323 [34:45<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 343/12323 [34:45<20:14:14,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132798,8 +28898,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11804/12318 [20:25:46<53:22,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11804/12318 [20:25:46<53:22,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 344/12323 [34:53<20:14:50,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 344/12323 [34:53<20:14:50,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132807,8 +28907,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11805/12318 [20:25:51<53:16,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11805/12318 [20:25:51<53:16,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 345/12323 [34:59<20:14:52,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 345/12323 [34:59<20:14:52,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132816,8 +28916,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11806/12318 [20:25:54<53:09,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11806/12318 [20:25:54<53:09,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 346/12323 [35:02<20:13:09,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 346/12323 [35:02<20:13:09,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132825,8 +28925,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11807/12318 [20:26:00<53:03,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11807/12318 [20:26:00<53:03,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 347/12323 [35:07<20:12:21,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 347/12323 [35:07<20:12:21,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132834,8 +28934,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11808/12318 [20:27:03<52:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11808/12318 [20:27:03<52:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 348/12323 [35:16<20:13:35,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 348/12323 [35:16<20:13:35,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132843,8 +28943,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11809/12318 [20:27:09<52:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11809/12318 [20:27:09<52:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 349/12323 [35:19<20:12:11,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 349/12323 [35:19<20:12:11,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132852,8 +28952,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11810/12318 [20:27:17<52:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11810/12318 [20:27:17<52:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 350/12323 [35:21<20:09:21,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 350/12323 [35:21<20:09:21,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132861,8 +28961,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11811/12318 [20:27:20<52:41,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11811/12318 [20:27:20<52:41,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 351/12323 [35:28<20:09:59,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 351/12323 [35:28<20:09:59,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132870,8 +28970,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11812/12318 [20:27:26<52:34,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11812/12318 [20:27:26<52:34,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 352/12323 [35:55<20:21:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 352/12323 [35:55<20:21:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132879,8 +28979,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11813/12318 [20:27:30<52:28,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11813/12318 [20:27:30<52:28,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 353/12323 [36:00<20:21:00,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 353/12323 [36:00<20:21:00,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132888,8 +28988,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11814/12318 [20:27:37<52:22,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11814/12318 [20:27:37<52:22,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 354/12323 [36:04<20:19:34,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 354/12323 [36:04<20:19:34,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132897,8 +28997,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11815/12318 [20:27:41<52:15,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11815/12318 [20:27:41<52:15,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 355/12323 [36:08<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 355/12323 [36:08<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132906,8 +29006,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11816/12318 [20:27:44<52:09,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11816/12318 [20:27:44<52:09,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 356/12323 [36:15<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 356/12323 [36:15<20:18:41,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132915,8 +29015,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11817/12318 [20:27:52<52:03,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11817/12318 [20:27:52<52:03,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 357/12323 [36:22<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 357/12323 [36:22<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132924,8 +29024,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11818/12318 [20:28:00<51:57,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11818/12318 [20:28:00<51:57,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 358/12323 [36:28<20:19:16,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 358/12323 [36:28<20:19:16,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132933,8 +29033,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11819/12318 [20:28:06<51:51,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11819/12318 [20:28:06<51:51,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 359/12323 [36:35<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 359/12323 [36:35<20:19:15,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132942,8 +29042,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11820/12318 [20:28:13<51:44,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11820/12318 [20:28:13<51:44,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 360/12323 [36:38<20:17:35,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 360/12323 [36:38<20:17:35,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132951,8 +29051,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11821/12318 [20:28:22<51:38,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11821/12318 [20:28:22<51:38,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 361/12323 [36:46<20:18:43,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 361/12323 [36:46<20:18:43,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132960,8 +29060,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11822/12318 [20:28:27<51:32,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11822/12318 [20:28:27<51:32,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 362/12323 [36:55<20:19:54,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 362/12323 [36:55<20:19:54,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132969,8 +29069,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11823/12318 [20:28:35<51:26,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11823/12318 [20:28:35<51:26,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 363/12323 [36:56<20:17:19,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 363/12323 [36:56<20:17:19,  6.11s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -132978,8 +29078,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11824/12318 [20:28:43<51:20,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11824/12318 [20:28:43<51:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 364/12323 [36:58<20:14:33,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   3%| | 364/12323 [36:58<20:14:33,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -132987,8 +29087,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11825/12318 [20:28:52<51:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11825/12318 [20:28:52<51:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 365/12323 [37:05<20:15:07,  6.10s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   3%| | 365/12323 [37:05<20:15:07,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -132996,8 +29096,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11826/12318 [20:28:54<51:07,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11826/12318 [20:28:54<51:07,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 366/12323 [37:09<20:13:44,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 366/12323 [37:09<20:13:44,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133005,8 +29105,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11827/12318 [20:29:01<51:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11827/12318 [20:29:01<51:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 367/12323 [37:12<20:12:22,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 367/12323 [37:12<20:12:22,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133014,8 +29114,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11828/12318 [20:29:09<50:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11828/12318 [20:29:09<50:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 368/12323 [37:17<20:11:32,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 368/12323 [37:17<20:11:32,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133023,8 +29123,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11829/12318 [20:29:17<50:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11829/12318 [20:29:17<50:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 369/12323 [37:20<20:09:54,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 369/12323 [37:20<20:09:54,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133032,8 +29132,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11830/12318 [20:29:20<50:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11830/12318 [20:29:20<50:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 370/12323 [37:25<20:08:51,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 370/12323 [37:25<20:08:51,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133041,8 +29141,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11831/12318 [20:29:24<50:36,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11831/12318 [20:29:24<50:36,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 371/12323 [37:28<20:07:14,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 371/12323 [37:28<20:07:14,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133050,8 +29150,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11832/12318 [20:29:28<50:30,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11832/12318 [20:29:28<50:30,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 372/12323 [37:33<20:06:25,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 372/12323 [37:33<20:06:25,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133059,8 +29159,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11833/12318 [20:29:30<50:23,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11833/12318 [20:29:30<50:23,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 373/12323 [37:41<20:07:28,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 373/12323 [37:41<20:07:28,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133068,8 +29168,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11834/12318 [20:29:38<50:17,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11834/12318 [20:29:38<50:17,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 374/12323 [37:45<20:06:07,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 374/12323 [37:45<20:06:07,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133077,8 +29177,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11835/12318 [20:29:45<50:11,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11835/12318 [20:29:45<50:11,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 375/12323 [37:52<20:06:37,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 375/12323 [37:52<20:06:37,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133086,8 +29186,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11836/12318 [20:29:48<50:04,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11836/12318 [20:29:48<50:04,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 376/12323 [37:58<20:06:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 376/12323 [37:58<20:06:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133095,8 +29195,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11837/12318 [20:29:52<49:58,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11837/12318 [20:29:52<49:58,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 377/12323 [38:05<20:07:05,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 377/12323 [38:05<20:07:05,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133104,8 +29204,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11838/12318 [20:29:59<49:52,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11838/12318 [20:29:59<49:52,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 378/12323 [38:07<20:04:57,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 378/12323 [38:07<20:04:57,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133113,8 +29213,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11839/12318 [20:30:06<49:46,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11839/12318 [20:30:06<49:46,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 379/12323 [38:15<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 379/12323 [38:15<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133122,8 +29222,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11840/12318 [20:30:22<49:40,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11840/12318 [20:30:22<49:40,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 380/12323 [38:21<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 380/12323 [38:21<20:05:27,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133131,8 +29231,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11841/12318 [20:30:31<49:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11841/12318 [20:30:31<49:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 381/12323 [38:24<20:04:07,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 381/12323 [38:24<20:04:07,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133140,8 +29240,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11842/12318 [20:30:36<49:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11842/12318 [20:30:36<49:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 382/12323 [38:32<20:04:38,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 382/12323 [38:32<20:04:38,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133149,8 +29249,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11843/12318 [20:30:38<49:21,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11843/12318 [20:30:38<49:21,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 383/12323 [38:35<20:03:03,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 383/12323 [38:35<20:03:03,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133158,8 +29258,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11844/12318 [20:30:46<49:15,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11844/12318 [20:30:46<49:15,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 384/12323 [39:12<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 384/12323 [39:12<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133167,8 +29267,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11845/12318 [20:30:52<49:09,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11845/12318 [20:30:52<49:09,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 385/12323 [39:18<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 385/12323 [39:18<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133176,8 +29276,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11846/12318 [20:30:58<49:02,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11846/12318 [20:30:58<49:02,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 386/12323 [39:27<20:20:10,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 386/12323 [39:27<20:20:10,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133185,8 +29285,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11847/12318 [20:31:03<48:56,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11847/12318 [20:31:03<48:56,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 387/12323 [39:33<20:20:09,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 387/12323 [39:33<20:20:09,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133194,8 +29294,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11848/12318 [20:31:10<48:50,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11848/12318 [20:31:10<48:50,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 388/12323 [39:41<20:21:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 388/12323 [39:41<20:21:09,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133203,8 +29303,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11849/12318 [20:31:13<48:44,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11849/12318 [20:31:13<48:44,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 389/12323 [39:47<20:20:35,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 389/12323 [39:47<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133212,8 +29312,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11850/12318 [20:31:22<48:37,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11850/12318 [20:31:22<48:37,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 390/12323 [39:53<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 390/12323 [39:53<20:20:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133221,8 +29321,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11851/12318 [20:31:27<48:31,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11851/12318 [20:31:27<48:31,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 391/12323 [39:58<20:20:04,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 391/12323 [39:58<20:20:04,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133230,8 +29330,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11852/12318 [20:31:35<48:25,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11852/12318 [20:31:35<48:25,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 392/12323 [40:06<20:20:32,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 392/12323 [40:06<20:20:32,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133239,8 +29339,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11853/12318 [20:31:44<48:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11853/12318 [20:31:44<48:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 393/12323 [40:11<20:20:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 393/12323 [40:11<20:20:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133248,8 +29348,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11854/12318 [20:31:52<48:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11854/12318 [20:31:52<48:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 394/12323 [40:16<20:19:28,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 394/12323 [40:16<20:19:28,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133257,8 +29357,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11855/12318 [20:31:59<48:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11855/12318 [20:31:59<48:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 395/12323 [40:21<20:18:28,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 395/12323 [40:21<20:18:28,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133266,8 +29366,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11856/12318 [20:32:04<48:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11856/12318 [20:32:04<48:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 396/12323 [40:27<20:18:34,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 396/12323 [40:27<20:18:34,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133275,7 +29375,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11857/12318 [20:32:11<47:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 397/12323 [40:34<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 397/12323 [40:34<20:19:06,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133283,7 +29384,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11857/12318 [20:32:11<47:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 398/12323 [40:42<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 398/12323 [40:42<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133291,8 +29393,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11858/12318 [20:32:19<47:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11858/12318 [20:32:19<47:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 399/12323 [40:48<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 399/12323 [40:48<20:19:40,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133300,7 +29402,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11859/12318 [20:32:26<47:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 400/12323 [40:57<20:20:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 400/12323 [40:57<20:20:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133308,7 +29411,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11859/12318 [20:32:26<47:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 401/12323 [41:03<20:20:40,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 401/12323 [41:03<20:20:40,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133316,8 +29420,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11860/12318 [20:32:31<47:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11860/12318 [20:32:31<47:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 402/12323 [41:10<20:21:10,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 402/12323 [41:10<20:21:10,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133325,8 +29429,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11861/12318 [20:32:40<47:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11861/12318 [20:32:40<47:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 403/12323 [41:18<20:21:39,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 403/12323 [41:18<20:21:39,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133334,8 +29438,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11862/12318 [20:32:45<47:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11862/12318 [20:32:45<47:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 404/12323 [41:20<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 404/12323 [41:20<20:19:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133343,8 +29447,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11863/12318 [20:32:50<47:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11863/12318 [20:32:50<47:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 405/12323 [41:27<20:20:07,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 405/12323 [41:27<20:20:07,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133352,8 +29456,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11864/12318 [20:32:55<47:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11864/12318 [20:32:55<47:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 406/12323 [41:36<20:21:08,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 406/12323 [41:36<20:21:08,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133361,8 +29465,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11865/12318 [20:33:01<47:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11865/12318 [20:33:01<47:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 407/12323 [41:38<20:19:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 407/12323 [41:38<20:19:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133370,8 +29474,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11866/12318 [20:33:10<46:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11866/12318 [20:33:10<46:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 408/12323 [41:43<20:18:39,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 408/12323 [41:43<20:18:39,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133379,8 +29483,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11867/12318 [20:33:13<46:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11867/12318 [20:33:13<46:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 409/12323 [41:45<20:16:26,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 409/12323 [41:45<20:16:26,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133388,8 +29492,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11868/12318 [20:33:21<46:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11868/12318 [20:33:21<46:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 410/12323 [41:48<20:14:43,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 410/12323 [41:48<20:14:43,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133397,8 +29501,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11869/12318 [20:33:29<46:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11869/12318 [20:33:29<46:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 411/12323 [41:53<20:14:15,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 411/12323 [41:53<20:14:15,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133406,8 +29510,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11870/12318 [20:33:37<46:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11870/12318 [20:33:37<46:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 412/12323 [41:56<20:12:33,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 412/12323 [41:56<20:12:33,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133415,8 +29519,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11871/12318 [20:33:43<46:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11871/12318 [20:33:43<46:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 413/12323 [41:59<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133424,8 +29527,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11872/12318 [20:34:00<46:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11872/12318 [20:34:00<46:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 413/12323 [41:59<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133433,8 +29535,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11873/12318 [20:34:07<46:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11873/12318 [20:34:07<46:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 414/12323 [42:06<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 414/12323 [42:06<20:11:05,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133442,8 +29544,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11874/12318 [20:34:11<46:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11874/12318 [20:34:11<46:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 415/12323 [42:14<20:12:02,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 415/12323 [42:14<20:12:02,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133451,8 +29553,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11875/12318 [20:34:19<46:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11875/12318 [20:34:19<46:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 416/12323 [42:31<20:17:00,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 416/12323 [42:31<20:17:00,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133460,8 +29562,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11876/12318 [20:34:25<45:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11876/12318 [20:34:25<45:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 417/12323 [42:39<20:17:58,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 417/12323 [42:39<20:17:59,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133469,8 +29571,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11877/12318 [20:34:29<45:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11877/12318 [20:34:29<45:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 418/12323 [42:41<20:16:03,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 418/12323 [42:41<20:16:03,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133478,8 +29580,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11878/12318 [20:34:32<45:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11878/12318 [20:34:32<45:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 419/12323 [42:46<20:15:19,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 419/12323 [42:46<20:15:19,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133487,8 +29589,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11879/12318 [20:34:40<45:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11879/12318 [20:34:40<45:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 420/12323 [42:49<20:13:38,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 420/12323 [42:49<20:13:38,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133496,8 +29598,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11880/12318 [20:34:48<45:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11880/12318 [20:34:48<45:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 421/12323 [42:56<20:14:08,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 421/12323 [42:56<20:14:08,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133505,8 +29607,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11881/12318 [20:34:51<45:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11881/12318 [20:34:51<45:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 422/12323 [43:05<20:15:03,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 422/12323 [43:05<20:15:03,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133514,8 +29616,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11882/12318 [20:34:56<45:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11882/12318 [20:34:56<45:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 423/12323 [43:11<20:15:05,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 423/12323 [43:11<20:15:05,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133523,8 +29625,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11883/12318 [20:34:59<45:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11883/12318 [20:34:59<45:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 424/12323 [43:18<20:15:31,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 424/12323 [43:18<20:15:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133532,8 +29634,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11884/12318 [20:35:06<45:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11884/12318 [20:35:06<45:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 425/12323 [43:23<20:14:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 425/12323 [43:23<20:14:34,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133541,8 +29643,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11885/12318 [20:35:11<45:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11885/12318 [20:35:11<45:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 426/12323 [43:26<20:13:09,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 426/12323 [43:26<20:13:09,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133550,8 +29652,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  96%|▉| 11886/12318 [20:35:17<44:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  96%|▉| 11886/12318 [20:35:17<44:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 427/12323 [43:31<20:12:25,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 427/12323 [43:31<20:12:25,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133559,8 +29661,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11887/12318 [20:35:24<44:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11887/12318 [20:35:24<44:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 428/12323 [43:36<20:11:56,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 428/12323 [43:36<20:11:56,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133568,8 +29670,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11888/12318 [20:35:26<44:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11888/12318 [20:35:26<44:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 429/12323 [43:39<20:10:18,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 429/12323 [43:39<20:10:18,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133577,8 +29679,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11889/12318 [20:35:35<44:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11889/12318 [20:35:35<44:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 430/12323 [43:43<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 430/12323 [43:43<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133586,8 +29688,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11890/12318 [20:35:42<44:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11890/12318 [20:35:42<44:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   3%| | 431/12323 [43:49<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   3%| | 431/12323 [43:49<20:09:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133595,8 +29697,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11891/12318 [20:35:47<44:22,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11891/12318 [20:35:47<44:22,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 432/12323 [43:55<20:08:51,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 432/12323 [43:55<20:08:51,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133604,8 +29706,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11892/12318 [20:35:50<44:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11892/12318 [20:35:50<44:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 433/12323 [44:01<20:08:49,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 433/12323 [44:01<20:08:49,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133613,8 +29715,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11893/12318 [20:35:56<44:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11893/12318 [20:35:56<44:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 434/12323 [44:08<20:09:19,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 434/12323 [44:08<20:09:19,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133622,8 +29724,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11894/12318 [20:36:05<44:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11894/12318 [20:36:05<44:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 435/12323 [44:16<20:09:46,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 435/12323 [44:16<20:09:46,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133631,8 +29733,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11895/12318 [20:36:12<43:57,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11895/12318 [20:36:12<43:57,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 436/12323 [44:18<20:08:10,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 436/12323 [44:18<20:08:10,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133640,8 +29742,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11896/12318 [20:36:17<43:51,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11896/12318 [20:36:17<43:51,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 437/12323 [44:20<20:05:52,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 437/12323 [44:20<20:05:52,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133649,8 +29751,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11897/12318 [20:36:21<43:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11897/12318 [20:36:21<43:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 438/12323 [44:23<20:04:44,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 438/12323 [44:23<20:04:44,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133658,8 +29760,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11898/12318 [20:36:27<43:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11898/12318 [20:36:27<43:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 439/12323 [44:26<20:02:55,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 439/12323 [44:26<20:02:55,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133667,8 +29769,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11899/12318 [20:36:33<43:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11899/12318 [20:36:33<43:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 440/12323 [44:32<20:02:54,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 440/12323 [44:32<20:02:54,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133676,8 +29778,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11900/12318 [20:36:35<43:26,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11900/12318 [20:36:35<43:26,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 441/12323 [44:39<20:03:21,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 441/12323 [44:39<20:03:21,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133685,8 +29787,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11901/12318 [20:36:39<43:19,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11901/12318 [20:36:39<43:19,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 442/12323 [44:40<20:01:05,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 442/12323 [44:40<20:01:05,  6.07s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -133694,8 +29796,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11902/12318 [20:36:41<43:13,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11902/12318 [20:36:41<43:13,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 443/12323 [44:49<20:01:58,  6.07s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 443/12323 [44:49<20:01:58,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133703,8 +29805,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11903/12318 [20:36:42<43:07,  6.23s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11903/12318 [20:36:42<43:07,  6.23s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 444/12323 [44:56<20:02:25,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 444/12323 [44:56<20:02:25,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133712,8 +29814,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11904/12318 [20:37:22<43:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11904/12318 [20:37:22<43:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 445/12323 [45:01<20:01:43,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 445/12323 [45:01<20:01:43,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133721,8 +29823,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11905/12318 [20:37:29<42:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11905/12318 [20:37:29<42:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 446/12323 [45:06<20:01:16,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 446/12323 [45:06<20:01:16,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133730,8 +29832,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11906/12318 [20:37:33<42:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11906/12318 [20:37:33<42:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 447/12323 [45:11<20:00:36,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 447/12323 [45:11<20:00:36,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133739,8 +29841,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11907/12318 [20:37:38<42:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11907/12318 [20:37:38<42:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 448/12323 [45:52<20:16:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 448/12323 [45:52<20:16:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133748,8 +29850,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11908/12318 [20:37:45<42:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11908/12318 [20:37:45<42:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 449/12323 [45:59<20:16:08,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 449/12323 [45:59<20:16:08,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133757,8 +29859,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11909/12318 [20:37:51<42:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11909/12318 [20:37:51<42:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 450/12323 [46:00<20:14:07,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 450/12323 [46:00<20:14:07,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133766,7 +29868,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11910/12318 [20:37:54<42:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 451/12323 [46:04<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133774,7 +29876,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11910/12318 [20:37:54<42:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 451/12323 [46:04<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133782,8 +29884,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11911/12318 [20:38:03<42:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11911/12318 [20:38:03<42:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 452/12323 [46:10<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 452/12323 [46:10<20:12:46,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133791,8 +29893,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11912/12318 [20:38:10<42:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11912/12318 [20:38:10<42:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 453/12323 [46:15<20:12:18,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 453/12323 [46:15<20:12:18,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133800,8 +29902,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11913/12318 [20:38:15<42:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11913/12318 [20:38:15<42:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 454/12323 [46:19<20:10:58,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 454/12323 [46:19<20:10:58,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133809,8 +29911,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11914/12318 [20:38:18<41:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11914/12318 [20:38:18<41:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 455/12323 [46:23<20:09:52,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 455/12323 [46:23<20:09:52,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133818,8 +29920,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11915/12318 [20:38:21<41:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11915/12318 [20:38:21<41:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 456/12323 [46:26<20:08:32,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 456/12323 [46:26<20:08:32,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133827,8 +29929,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11916/12318 [20:38:27<41:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11916/12318 [20:38:27<41:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 457/12323 [46:27<20:06:21,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 457/12323 [46:27<20:06:21,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133836,8 +29938,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11917/12318 [20:38:30<41:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11917/12318 [20:38:30<41:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 458/12323 [46:32<20:05:54,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 458/12323 [46:32<20:05:54,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133845,8 +29947,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11918/12318 [20:38:35<41:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11918/12318 [20:38:35<41:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 459/12323 [46:41<20:06:44,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 459/12323 [46:41<20:06:44,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133854,8 +29956,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11919/12318 [20:38:40<41:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11919/12318 [20:38:40<41:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 460/12323 [46:46<20:06:17,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 460/12323 [46:46<20:06:17,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133863,8 +29965,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11920/12318 [20:38:46<41:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11920/12318 [20:38:46<41:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 461/12323 [46:49<20:04:46,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 461/12323 [46:49<20:04:46,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133872,8 +29974,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11921/12318 [20:38:54<41:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11921/12318 [20:38:54<41:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 462/12323 [46:53<20:03:54,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 462/12323 [46:53<20:03:54,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133881,8 +29983,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11922/12318 [20:38:59<41:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11922/12318 [20:38:59<41:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 463/12323 [46:58<20:03:27,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 463/12323 [46:58<20:03:27,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133890,8 +29992,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11923/12318 [20:39:05<41:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11923/12318 [20:39:05<41:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 464/12323 [47:04<20:03:02,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 464/12323 [47:04<20:03:02,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133899,8 +30001,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11924/12318 [20:39:14<40:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11924/12318 [20:39:14<40:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 465/12323 [47:10<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 465/12323 [47:10<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133908,8 +30010,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11925/12318 [20:39:21<40:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11925/12318 [20:39:21<40:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 466/12323 [47:17<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 466/12323 [47:17<20:03:06,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133917,8 +30019,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11926/12318 [20:39:28<40:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11926/12318 [20:39:28<40:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 467/12323 [47:22<20:02:40,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 467/12323 [47:22<20:02:40,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133926,8 +30028,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11927/12318 [20:39:35<40:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11927/12318 [20:39:35<40:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 468/12323 [47:25<20:01:23,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 468/12323 [47:25<20:01:23,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133935,8 +30037,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11928/12318 [20:39:40<40:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11928/12318 [20:39:40<40:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 469/12323 [47:27<19:59:41,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 469/12323 [47:27<19:59:41,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133944,8 +30046,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11929/12318 [20:39:49<40:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11929/12318 [20:39:49<40:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 470/12323 [47:34<19:59:43,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 470/12323 [47:34<19:59:43,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133953,8 +30055,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11930/12318 [20:39:53<40:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11930/12318 [20:39:53<40:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 471/12323 [47:35<19:57:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 471/12323 [47:35<19:57:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133962,8 +30064,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11931/12318 [20:39:58<40:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11931/12318 [20:39:58<40:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 472/12323 [47:41<19:57:37,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 472/12323 [47:41<19:57:37,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133971,8 +30073,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11932/12318 [20:40:05<40:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11932/12318 [20:40:05<40:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 473/12323 [47:47<19:57:14,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 473/12323 [47:47<19:57:14,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133980,8 +30082,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11933/12318 [20:40:13<40:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11933/12318 [20:40:13<40:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 474/12323 [47:52<19:56:36,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 474/12323 [47:52<19:56:36,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133989,7 +30091,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11934/12318 [20:40:19<39:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 475/12323 [47:56<19:55:46,  6.06s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 475/12323 [47:56<19:55:46,  6.06s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -133997,7 +30100,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11934/12318 [20:40:19<39:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 476/12323 [47:57<19:53:41,  6.05s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 476/12323 [47:57<19:53:41,  6.05s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -134005,8 +30109,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11935/12318 [20:40:28<39:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11935/12318 [20:40:28<39:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 477/12323 [48:04<19:53:44,  6.05s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 477/12323 [48:04<19:53:44,  6.05s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134014,8 +30118,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11936/12318 [20:41:01<39:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11936/12318 [20:41:01<39:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 478/12323 [48:09<19:53:19,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 478/12323 [48:09<19:53:19,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134023,8 +30127,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11937/12318 [20:41:08<39:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11937/12318 [20:41:08<39:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 479/12323 [48:13<19:52:30,  6.04s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 479/12323 [48:13<19:52:30,  6.04s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134032,8 +30136,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11938/12318 [20:41:13<39:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11938/12318 [20:41:13<39:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 480/12323 [49:16<20:15:33,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 480/12323 [49:16<20:15:33,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134041,8 +30145,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11939/12318 [20:41:21<39:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11939/12318 [20:41:21<39:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 481/12323 [49:21<20:15:08,  6.16s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 481/12323 [49:21<20:15:08,  6.16s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134050,8 +30154,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11940/12318 [20:41:29<39:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11940/12318 [20:41:29<39:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 482/12323 [49:23<20:13:14,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 482/12323 [49:23<20:13:14,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134059,8 +30163,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11941/12318 [20:41:34<39:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11941/12318 [20:41:34<39:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 483/12323 [49:27<20:12:11,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 483/12323 [49:27<20:12:11,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134068,8 +30172,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11942/12318 [20:41:41<39:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11942/12318 [20:41:41<39:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 484/12323 [49:30<20:10:55,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 484/12323 [49:30<20:10:55,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134077,8 +30181,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11943/12318 [20:41:45<38:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11943/12318 [20:41:45<38:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 485/12323 [49:35<20:10:14,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 485/12323 [49:35<20:10:14,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134086,8 +30190,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11944/12318 [20:41:49<38:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11944/12318 [20:41:49<38:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 486/12323 [49:38<20:08:58,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 486/12323 [49:38<20:08:58,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134095,8 +30199,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11945/12318 [20:41:57<38:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11945/12318 [20:41:57<38:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 487/12323 [49:41<20:07:43,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 487/12323 [49:41<20:07:43,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134104,7 +30208,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11946/12318 [20:42:01<38:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 488/12323 [49:50<20:08:34,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 488/12323 [49:50<20:08:34,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134112,7 +30217,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11946/12318 [20:42:01<38:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 489/12323 [49:52<20:07:08,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 489/12323 [49:52<20:07:08,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134120,8 +30226,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11947/12318 [20:42:05<38:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11947/12318 [20:42:05<38:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 490/12323 [49:57<20:06:33,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 490/12323 [49:57<20:06:33,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134129,8 +30235,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11948/12318 [20:42:06<38:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11948/12318 [20:42:06<38:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 491/12323 [50:03<20:06:07,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 491/12323 [50:03<20:06:07,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134138,8 +30244,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11949/12318 [20:42:12<38:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11949/12318 [20:42:12<38:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 492/12323 [50:06<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 492/12323 [50:06<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134147,8 +30253,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11950/12318 [20:42:19<38:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11950/12318 [20:42:19<38:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 493/12323 [50:13<20:05:20,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 493/12323 [50:13<20:05:20,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134156,8 +30262,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11951/12318 [20:42:28<38:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11951/12318 [20:42:28<38:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 494/12323 [50:19<20:04:57,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 494/12323 [50:19<20:04:57,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134165,8 +30271,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11952/12318 [20:42:31<38:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11952/12318 [20:42:31<38:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 495/12323 [50:26<20:05:19,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 495/12323 [50:26<20:05:19,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134174,8 +30280,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11953/12318 [20:42:35<37:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11953/12318 [20:42:35<37:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 496/12323 [50:31<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 496/12323 [50:31<20:04:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134183,8 +30289,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11954/12318 [20:42:41<37:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11954/12318 [20:42:41<37:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 497/12323 [50:40<20:05:41,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 497/12323 [50:40<20:05:41,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134192,8 +30298,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11955/12318 [20:42:45<37:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11955/12318 [20:42:45<37:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 498/12323 [50:44<20:04:52,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 498/12323 [50:44<20:04:52,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134201,8 +30307,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11956/12318 [20:42:49<37:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11956/12318 [20:42:49<37:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 499/12323 [50:45<20:02:51,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 499/12323 [50:45<20:02:51,  6.10s/it, v_num=i2o7, train/loss=2."
      ]
     },
     {
@@ -134210,8 +30316,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11957/12318 [20:42:53<37:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11957/12318 [20:42:53<37:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 500/12323 [50:53<20:03:14,  6.11s/it, v_num=i2o7, train/loss=2.\r",
+      "Epoch 0:   4%| | 500/12323 [50:53<20:03:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134219,8 +30325,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11958/12318 [20:43:01<37:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11958/12318 [20:43:01<37:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 501/12323 [50:55<20:01:37,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 501/12323 [50:55<20:01:37,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134228,8 +30334,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11959/12318 [20:43:08<37:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11959/12318 [20:43:08<37:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 502/12323 [51:03<20:02:22,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 502/12323 [51:03<20:02:22,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134237,7 +30343,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11960/12318 [20:43:14<37:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 503/12323 [51:04<20:00:23,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 503/12323 [51:04<20:00:23,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -134245,7 +30352,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11960/12318 [20:43:14<37:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 504/12323 [51:11<20:00:22,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 504/12323 [51:11<20:00:22,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134253,8 +30361,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11961/12318 [20:43:22<37:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11961/12318 [20:43:22<37:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 505/12323 [51:18<20:00:43,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 505/12323 [51:18<20:00:43,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134262,8 +30370,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11962/12318 [20:43:30<37:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11962/12318 [20:43:30<37:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 506/12323 [51:26<20:01:30,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 506/12323 [51:26<20:01:30,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134271,8 +30379,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11963/12318 [20:43:35<36:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11963/12318 [20:43:35<36:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 507/12323 [51:31<20:00:41,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 507/12323 [51:31<20:00:41,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134280,8 +30388,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11964/12318 [20:43:39<36:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11964/12318 [20:43:39<36:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 508/12323 [51:33<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 508/12323 [51:33<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134289,8 +30397,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11965/12318 [20:43:43<36:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11965/12318 [20:43:43<36:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 509/12323 [51:39<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 509/12323 [51:39<19:59:05,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134298,8 +30406,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11966/12318 [20:43:46<36:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11966/12318 [20:43:46<36:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 510/12323 [51:48<19:59:51,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 510/12323 [51:48<19:59:51,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134307,8 +30415,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11967/12318 [20:43:53<36:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11967/12318 [20:43:53<36:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 511/12323 [51:55<20:00:13,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 511/12323 [51:55<20:00:13,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134316,8 +30424,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11968/12318 [20:44:32<36:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11968/12318 [20:44:32<36:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 512/12323 [52:26<20:09:36,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 512/12323 [52:26<20:09:36,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134325,8 +30433,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11969/12318 [20:44:40<36:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11969/12318 [20:44:40<36:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 513/12323 [52:33<20:09:58,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 513/12323 [52:33<20:09:58,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134334,8 +30442,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11970/12318 [20:44:48<36:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11970/12318 [20:44:48<36:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 514/12323 [52:38<20:09:34,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 514/12323 [52:38<20:09:34,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134343,8 +30451,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11971/12318 [20:44:54<36:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11971/12318 [20:44:54<36:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 515/12323 [52:41<20:08:10,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 515/12323 [52:41<20:08:10,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134352,8 +30460,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11972/12318 [20:45:01<35:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11972/12318 [20:45:01<35:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 516/12323 [52:46<20:07:44,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 516/12323 [52:46<20:07:44,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134361,8 +30469,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11973/12318 [20:45:03<35:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11973/12318 [20:45:03<35:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 517/12323 [52:55<20:08:28,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 517/12323 [52:55<20:08:28,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134370,8 +30478,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11974/12318 [20:45:07<35:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11974/12318 [20:45:07<35:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 518/12323 [53:03<20:09:09,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 518/12323 [53:03<20:09:09,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134379,8 +30487,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11975/12318 [20:45:16<35:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11975/12318 [20:45:16<35:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 519/12323 [53:06<20:07:58,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 519/12323 [53:06<20:07:58,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134388,8 +30496,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11976/12318 [20:45:23<35:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11976/12318 [20:45:23<35:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 520/12323 [53:10<20:06:57,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 520/12323 [53:10<20:06:57,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134397,8 +30505,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11977/12318 [20:45:28<35:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11977/12318 [20:45:28<35:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 521/12323 [53:16<20:06:52,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 521/12323 [53:16<20:06:52,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134406,8 +30514,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11978/12318 [20:45:33<35:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11978/12318 [20:45:33<35:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 522/12323 [53:24<20:07:33,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 522/12323 [53:24<20:07:33,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134415,8 +30523,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11979/12318 [20:45:39<35:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11979/12318 [20:45:39<35:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 523/12323 [53:31<20:07:30,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 523/12323 [53:31<20:07:30,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134424,8 +30532,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11980/12318 [20:45:43<35:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11980/12318 [20:45:43<35:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 524/12323 [53:34<20:06:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 524/12323 [53:34<20:06:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134433,8 +30541,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11981/12318 [20:45:50<35:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11981/12318 [20:45:50<35:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 525/12323 [53:42<20:07:04,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 525/12323 [53:42<20:07:04,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134442,8 +30550,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11982/12318 [20:45:58<34:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11982/12318 [20:45:58<34:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 526/12323 [53:50<20:07:25,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 526/12323 [53:50<20:07:25,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134451,8 +30559,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11983/12318 [20:46:04<34:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11983/12318 [20:46:04<34:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 527/12323 [53:53<20:06:05,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 527/12323 [53:53<20:06:05,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134460,8 +30568,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11984/12318 [20:46:09<34:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11984/12318 [20:46:09<34:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 528/12323 [53:54<20:04:10,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 528/12323 [53:54<20:04:10,  6.13s/it, v_num=i2o7, train/loss=2."
      ]
     },
     {
@@ -134469,8 +30577,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11985/12318 [20:46:16<34:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11985/12318 [20:46:16<34:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 529/12323 [53:58<20:03:12,  6.12s/it, v_num=i2o7, train/loss=2.\r",
+      "Epoch 0:   4%| | 529/12323 [53:58<20:03:12,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134478,8 +30586,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11986/12318 [20:46:23<34:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11986/12318 [20:46:23<34:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 530/12323 [54:05<20:03:34,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 530/12323 [54:05<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134487,8 +30595,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11987/12318 [20:46:31<34:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11987/12318 [20:46:31<34:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 531/12323 [54:11<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 531/12323 [54:11<20:03:35,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134496,8 +30604,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11988/12318 [20:46:33<34:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11988/12318 [20:46:33<34:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 532/12323 [54:15<20:02:26,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 532/12323 [54:15<20:02:26,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134505,8 +30613,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11989/12318 [20:46:37<34:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11989/12318 [20:46:37<34:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 533/12323 [54:19<20:01:29,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 533/12323 [54:19<20:01:29,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134514,8 +30622,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11990/12318 [20:46:41<34:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11990/12318 [20:46:41<34:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 534/12323 [54:23<20:00:54,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 534/12323 [54:23<20:00:54,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134523,8 +30631,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11991/12318 [20:46:43<33:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11991/12318 [20:46:43<33:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 535/12323 [54:25<19:59:02,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 535/12323 [54:25<19:59:02,  6.10s/it, v_num=i2o7, train/loss=3."
      ]
     },
     {
@@ -134532,8 +30640,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11992/12318 [20:46:46<33:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11992/12318 [20:46:46<33:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 536/12323 [54:28<19:57:55,  6.10s/it, v_num=i2o7, train/loss=3.\r",
+      "Epoch 0:   4%| | 536/12323 [54:28<19:57:55,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134541,8 +30649,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11993/12318 [20:46:52<33:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11993/12318 [20:46:52<33:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 537/12323 [54:29<19:56:03,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 537/12323 [54:29<19:56:03,  6.09s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -134550,8 +30658,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11994/12318 [20:47:00<33:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11994/12318 [20:47:00<33:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 538/12323 [54:37<19:56:25,  6.09s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   4%| | 538/12323 [54:37<19:56:25,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134559,8 +30667,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11995/12318 [20:47:08<33:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11995/12318 [20:47:08<33:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 539/12323 [54:40<19:55:30,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 539/12323 [54:40<19:55:30,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134568,8 +30676,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11996/12318 [20:47:15<33:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11996/12318 [20:47:15<33:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 540/12323 [54:43<19:54:12,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 540/12323 [54:43<19:54:12,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134577,8 +30685,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11997/12318 [20:47:23<33:22,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11997/12318 [20:47:23<33:22,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 541/12323 [54:46<19:52:55,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 541/12323 [54:46<19:52:55,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134586,8 +30694,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11998/12318 [20:47:31<33:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11998/12318 [20:47:31<33:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 542/12323 [54:49<19:51:48,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 542/12323 [54:49<19:51:48,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134595,8 +30703,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 11999/12318 [20:47:38<33:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 11999/12318 [20:47:38<33:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 543/12323 [54:57<19:52:10,  6.07s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 543/12323 [54:57<19:52:11,  6.07s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134604,8 +30712,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12000/12318 [20:47:54<33:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12000/12318 [20:47:54<33:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 544/12323 [55:44<20:07:02,  6.15s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 544/12323 [55:44<20:07:02,  6.15s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134613,8 +30721,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12001/12318 [20:48:22<32:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12001/12318 [20:48:22<32:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 545/12323 [55:46<20:05:23,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 545/12323 [55:46<20:05:23,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134622,8 +30730,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12002/12318 [20:48:26<32:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12002/12318 [20:48:26<32:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 546/12323 [55:55<20:06:08,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 546/12323 [55:55<20:06:08,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134631,8 +30739,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12003/12318 [20:48:29<32:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12003/12318 [20:48:29<32:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 547/12323 [55:58<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 547/12323 [55:58<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134640,8 +30748,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12004/12318 [20:48:30<32:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12004/12318 [20:48:30<32:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 548/12323 [56:04<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 548/12323 [56:04<20:05:01,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134649,8 +30757,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12005/12318 [20:48:38<32:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12005/12318 [20:48:38<32:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 549/12323 [56:08<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 549/12323 [56:08<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134658,8 +30766,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12006/12318 [20:48:45<32:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12006/12318 [20:48:45<32:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 550/12323 [56:14<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 550/12323 [56:14<20:03:54,  6.14s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134667,7 +30775,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12007/12318 [20:48:48<32:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 551/12323 [56:19<20:03:20,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 551/12323 [56:19<20:03:20,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134675,7 +30784,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12007/12318 [20:48:48<32:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 552/12323 [56:22<20:02:14,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 552/12323 [56:22<20:02:14,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134683,8 +30793,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12008/12318 [20:48:52<32:14,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12008/12318 [20:48:52<32:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 553/12323 [56:27<20:01:41,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 553/12323 [56:27<20:01:41,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134692,8 +30802,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12009/12318 [20:48:58<32:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12009/12318 [20:48:58<32:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   4%| | 554/12323 [56:35<20:02:04,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   4%| | 554/12323 [56:35<20:02:04,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134701,8 +30811,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  97%|▉| 12010/12318 [20:49:01<32:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  97%|▉| 12010/12318 [20:49:01<32:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 555/12323 [56:39<20:01:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134710,8 +30819,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12011/12318 [20:49:05<31:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12011/12318 [20:49:05<31:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 555/12323 [56:39<20:01:31,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134719,8 +30827,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12012/12318 [20:49:08<31:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12012/12318 [20:49:08<31:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 556/12323 [56:44<20:00:47,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 556/12323 [56:44<20:00:47,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134728,8 +30836,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12013/12318 [20:49:17<31:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12013/12318 [20:49:17<31:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 557/12323 [56:51<20:01:09,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 557/12323 [56:51<20:01:09,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134737,8 +30845,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12014/12318 [20:49:25<31:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12014/12318 [20:49:25<31:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 558/12323 [57:00<20:01:52,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 558/12323 [57:00<20:01:52,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134746,8 +30854,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12015/12318 [20:49:30<31:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12015/12318 [20:49:30<31:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 559/12323 [57:06<20:01:51,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 559/12323 [57:06<20:01:51,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134755,7 +30863,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12016/12318 [20:49:35<31:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 560/12323 [57:10<20:01:08,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 560/12323 [57:10<20:01:08,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134763,7 +30872,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12016/12318 [20:49:35<31:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 561/12323 [57:15<20:00:39,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 561/12323 [57:15<20:00:39,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134771,8 +30881,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12017/12318 [20:49:40<31:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12017/12318 [20:49:40<31:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 562/12323 [57:20<19:59:57,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 562/12323 [57:20<19:59:57,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134780,8 +30890,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12018/12318 [20:49:44<31:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12018/12318 [20:49:44<31:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 563/12323 [57:28<20:00:41,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 563/12323 [57:28<20:00:41,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134789,8 +30899,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12019/12318 [20:49:52<31:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12019/12318 [20:49:52<31:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 564/12323 [57:37<20:01:25,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 564/12323 [57:37<20:01:25,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134798,8 +30908,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12020/12318 [20:49:58<30:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12020/12318 [20:49:58<30:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 565/12323 [57:41<20:00:32,  6.13s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 565/12323 [57:41<20:00:32,  6.13s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134807,8 +30917,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12021/12318 [20:50:01<30:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12021/12318 [20:50:01<30:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 566/12323 [57:43<19:58:56,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 566/12323 [57:43<19:58:56,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134816,8 +30926,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12022/12318 [20:50:09<30:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12022/12318 [20:50:09<30:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 567/12323 [57:45<19:57:30,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 567/12323 [57:45<19:57:30,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134825,8 +30935,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12023/12318 [20:50:18<30:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12023/12318 [20:50:18<30:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 568/12323 [57:48<19:56:14,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 568/12323 [57:48<19:56:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134834,8 +30944,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12024/12318 [20:50:20<30:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12024/12318 [20:50:20<30:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 569/12323 [57:50<19:54:58,  6.10s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 569/12323 [57:50<19:54:58,  6.10s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134843,8 +30953,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12025/12318 [20:50:23<30:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12025/12318 [20:50:23<30:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 570/12323 [57:53<19:53:33,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 570/12323 [57:53<19:53:33,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134852,8 +30962,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12026/12318 [20:50:32<30:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12026/12318 [20:50:32<30:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 571/12323 [57:55<19:52:18,  6.09s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 571/12323 [57:55<19:52:18,  6.09s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134861,8 +30971,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12027/12318 [20:50:38<30:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12027/12318 [20:50:38<30:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 572/12323 [57:59<19:51:24,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 572/12323 [57:59<19:51:24,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134870,8 +30980,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12028/12318 [20:50:44<30:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12028/12318 [20:50:44<30:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 573/12323 [58:04<19:50:52,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 573/12323 [58:04<19:50:52,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134879,8 +30989,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12029/12318 [20:50:49<30:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12029/12318 [20:50:49<30:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 574/12323 [58:12<19:51:31,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 574/12323 [58:12<19:51:31,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134888,8 +30998,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12030/12318 [20:50:54<29:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12030/12318 [20:50:54<29:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 575/12323 [58:17<19:51:07,  6.08s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 575/12323 [58:17<19:51:07,  6.08s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134897,8 +31007,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12031/12318 [20:50:59<29:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12031/12318 [20:50:59<29:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 576/12323 [58:47<19:59:07,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 576/12323 [58:47<19:59:07,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134906,8 +31016,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12032/12318 [20:51:27<29:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12032/12318 [20:51:27<29:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 577/12323 [58:51<19:58:14,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 577/12323 [58:51<19:58:14,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134915,8 +31025,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12033/12318 [20:51:30<29:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12033/12318 [20:51:30<29:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 578/12323 [58:55<19:57:20,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 578/12323 [58:55<19:57:20,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134924,8 +31034,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12034/12318 [20:51:33<29:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12034/12318 [20:51:33<29:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 579/12323 [59:00<19:56:47,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 579/12323 [59:00<19:56:47,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134933,8 +31043,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12035/12318 [20:51:41<29:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12035/12318 [20:51:41<29:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 580/12323 [59:07<19:57:06,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 580/12323 [59:07<19:57:06,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134942,7 +31052,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12036/12318 [20:51:50<29:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 581/12323 [59:15<19:57:44,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 581/12323 [59:15<19:57:44,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134950,7 +31061,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12036/12318 [20:51:50<29:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 582/12323 [59:19<19:56:40,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 582/12323 [59:19<19:56:40,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134958,8 +31070,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12037/12318 [20:51:56<29:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12037/12318 [20:51:56<29:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 583/12323 [59:23<19:56:06,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 583/12323 [59:23<19:56:06,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134967,8 +31079,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12038/12318 [20:52:05<29:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12038/12318 [20:52:05<29:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 584/12323 [59:32<19:56:42,  6.12s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 584/12323 [59:32<19:56:42,  6.12s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134976,8 +31088,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12039/12318 [20:52:07<29:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12039/12318 [20:52:07<29:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 585/12323 [59:33<19:54:58,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 585/12323 [59:33<19:54:58,  6.11s/it, v_num=i2o7, train/loss=0."
      ]
     },
     {
@@ -134985,8 +31097,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12040/12318 [20:52:15<28:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12040/12318 [20:52:15<28:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 586/12323 [59:41<19:55:34,  6.11s/it, v_num=i2o7, train/loss=0.\r",
+      "Epoch 0:   5%| | 586/12323 [59:41<19:55:34,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -134994,7 +31106,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12041/12318 [20:52:17<28:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 587/12323 [59:45<19:54:40,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 587/12323 [59:45<19:54:40,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -135002,7 +31115,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12041/12318 [20:52:17<28:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 588/12323 [59:53<19:55:18,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 588/12323 [59:53<19:55:18,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -135010,8 +31124,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12042/12318 [20:52:23<28:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12042/12318 [20:52:23<28:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 589/12323 [59:56<19:54:14,  6.11s/it, v_num=i2o7, train/loss=7.\r",
+      "Epoch 0:   5%| | 589/12323 [59:56<19:54:14,  6.11s/it, v_num=i2o7, train/loss=7."
      ]
     },
     {
@@ -135019,8 +31133,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12043/12318 [20:52:28<28:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12043/12318 [20:52:28<28:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 590/12323 [1:00:05<19:54:51,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 590/12323 [1:00:05<19:54:51,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135028,8 +31142,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12044/12318 [20:52:33<28:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12044/12318 [20:52:33<28:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 591/12323 [1:00:09<19:54:19,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 591/12323 [1:00:09<19:54:19,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135037,8 +31151,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12045/12318 [20:52:38<28:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12045/12318 [20:52:38<28:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 592/12323 [1:00:13<19:53:26,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 592/12323 [1:00:13<19:53:26,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135046,8 +31160,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12046/12318 [20:52:43<28:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12046/12318 [20:52:43<28:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 593/12323 [1:00:18<19:52:53,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 593/12323 [1:00:18<19:52:53,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135055,8 +31169,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12047/12318 [20:52:50<28:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12047/12318 [20:52:50<28:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 594/12323 [1:00:19<19:51:11,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 594/12323 [1:00:19<19:51:11,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135064,8 +31178,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12048/12318 [20:52:53<28:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12048/12318 [20:52:53<28:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 595/12323 [1:00:21<19:49:40,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 595/12323 [1:00:21<19:49:40,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135073,8 +31187,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12049/12318 [20:52:56<27:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12049/12318 [20:52:56<27:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 596/12323 [1:00:29<19:50:19,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 596/12323 [1:00:29<19:50:19,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135082,8 +31196,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12050/12318 [20:53:04<27:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12050/12318 [20:53:04<27:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 597/12323 [1:00:34<19:49:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 597/12323 [1:00:34<19:49:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135091,8 +31205,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12051/12318 [20:53:12<27:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12051/12318 [20:53:12<27:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 598/12323 [1:00:35<19:48:09,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 598/12323 [1:00:35<19:48:09,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135100,8 +31214,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12052/12318 [20:53:20<27:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12052/12318 [20:53:20<27:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 599/12323 [1:00:42<19:48:11,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 599/12323 [1:00:42<19:48:11,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135109,8 +31223,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12053/12318 [20:53:22<27:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12053/12318 [20:53:22<27:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 600/12323 [1:00:50<19:48:50,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 600/12323 [1:00:50<19:48:50,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135118,8 +31232,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12054/12318 [20:53:28<27:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12054/12318 [20:53:28<27:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 601/12323 [1:00:56<19:48:27,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 601/12323 [1:00:56<19:48:27,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135127,8 +31241,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12055/12318 [20:53:36<27:20,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12055/12318 [20:53:36<27:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 602/12323 [1:00:57<19:46:47,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 602/12323 [1:00:57<19:46:47,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135136,8 +31250,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12056/12318 [20:53:43<27:14,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12056/12318 [20:53:43<27:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 603/12323 [1:01:01<19:46:05,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 603/12323 [1:01:01<19:46:05,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135145,8 +31259,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12057/12318 [20:53:47<27:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12057/12318 [20:53:47<27:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 604/12323 [1:01:06<19:45:32,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 604/12323 [1:01:06<19:45:32,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135154,8 +31268,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12058/12318 [20:53:54<27:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12058/12318 [20:53:54<27:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 605/12323 [1:01:10<19:45:00,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 605/12323 [1:01:10<19:45:00,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135163,7 +31277,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12059/12318 [20:53:58<26:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 606/12323 [1:01:14<19:44:09,  6.06s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 606/12323 [1:01:14<19:44:09,  6.06s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135171,7 +31286,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12059/12318 [20:53:58<26:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 607/12323 [1:01:19<19:43:39,  6.06s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 607/12323 [1:01:19<19:43:39,  6.06s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135179,8 +31295,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12060/12318 [20:54:06<26:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12060/12318 [20:54:06<26:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 608/12323 [1:02:13<19:58:58,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 608/12323 [1:02:13<19:58:58,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135188,8 +31304,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12061/12318 [20:54:13<26:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12061/12318 [20:54:13<26:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 609/12323 [1:02:21<19:59:17,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 609/12323 [1:02:21<19:59:17,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135197,8 +31313,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12062/12318 [20:54:16<26:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12062/12318 [20:54:16<26:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 610/12323 [1:02:28<19:59:38,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 610/12323 [1:02:28<19:59:38,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135206,8 +31322,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12063/12318 [20:54:21<26:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12063/12318 [20:54:21<26:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 611/12323 [1:02:33<19:59:08,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 611/12323 [1:02:33<19:59:08,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135215,8 +31331,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12064/12318 [20:54:44<26:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12064/12318 [20:54:44<26:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 612/12323 [1:02:35<19:57:48,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 612/12323 [1:02:35<19:57:48,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135224,8 +31340,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12065/12318 [20:54:47<26:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12065/12318 [20:54:47<26:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 613/12323 [1:02:42<19:57:49,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 613/12323 [1:02:42<19:57:49,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135233,8 +31349,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12066/12318 [20:54:55<26:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12066/12318 [20:54:55<26:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 614/12323 [1:02:48<19:57:49,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 614/12323 [1:02:48<19:57:49,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135242,8 +31358,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12067/12318 [20:55:00<26:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12067/12318 [20:55:00<26:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 615/12323 [1:02:53<19:57:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 615/12323 [1:02:53<19:57:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135251,8 +31367,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12068/12318 [20:55:02<25:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12068/12318 [20:55:02<25:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 616/12323 [1:03:01<19:57:38,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 616/12323 [1:03:01<19:57:38,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135260,8 +31376,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12069/12318 [20:55:04<25:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12069/12318 [20:55:04<25:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 617/12323 [1:03:04<19:56:48,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 617/12323 [1:03:04<19:56:48,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135269,8 +31385,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12070/12318 [20:55:11<25:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12070/12318 [20:55:11<25:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 618/12323 [1:03:12<19:57:07,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 618/12323 [1:03:12<19:57:07,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135278,8 +31394,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12071/12318 [20:55:15<25:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12071/12318 [20:55:15<25:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 619/12323 [1:03:16<19:56:17,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 619/12323 [1:03:16<19:56:17,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135287,8 +31403,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12072/12318 [20:55:18<25:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12072/12318 [20:55:18<25:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 620/12323 [1:03:18<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 620/12323 [1:03:18<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135296,8 +31412,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12073/12318 [20:55:26<25:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12073/12318 [20:55:26<25:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 621/12323 [1:03:27<19:55:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 621/12323 [1:03:27<19:55:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135305,8 +31421,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12074/12318 [20:55:34<25:22,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12074/12318 [20:55:34<25:22,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 622/12323 [1:03:29<19:54:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 622/12323 [1:03:29<19:54:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135314,8 +31430,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12075/12318 [20:55:39<25:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12075/12318 [20:55:39<25:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 623/12323 [1:03:38<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 623/12323 [1:03:38<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135323,8 +31439,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12076/12318 [20:55:41<25:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12076/12318 [20:55:41<25:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 624/12323 [1:03:44<19:55:08,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 624/12323 [1:03:44<19:55:08,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135332,8 +31448,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12077/12318 [20:55:46<25:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12077/12318 [20:55:46<25:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 625/12323 [1:03:46<19:53:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 625/12323 [1:03:46<19:53:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135341,8 +31457,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12078/12318 [20:55:51<24:57,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12078/12318 [20:55:51<24:57,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 626/12323 [1:03:50<19:53:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 626/12323 [1:03:50<19:53:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135350,8 +31466,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12079/12318 [20:55:57<24:51,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12079/12318 [20:55:57<24:51,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 627/12323 [1:03:55<19:52:33,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 627/12323 [1:03:55<19:52:33,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135359,8 +31475,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12080/12318 [20:55:59<24:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12080/12318 [20:55:59<24:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 628/12323 [1:03:59<19:51:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 628/12323 [1:03:59<19:51:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135368,8 +31484,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12081/12318 [20:56:06<24:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12081/12318 [20:56:06<24:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 629/12323 [1:04:05<19:51:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 629/12323 [1:04:05<19:51:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135377,8 +31493,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12082/12318 [20:56:11<24:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12082/12318 [20:56:11<24:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 630/12323 [1:04:08<19:50:28,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 630/12323 [1:04:08<19:50:28,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135386,8 +31502,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12083/12318 [20:56:12<24:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12083/12318 [20:56:12<24:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 631/12323 [1:04:12<19:49:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 631/12323 [1:04:12<19:49:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135395,8 +31511,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12084/12318 [20:56:20<24:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12084/12318 [20:56:20<24:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 632/12323 [1:04:14<19:48:15,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 632/12323 [1:04:14<19:48:15,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135404,8 +31520,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12085/12318 [20:56:24<24:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12085/12318 [20:56:24<24:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 633/12323 [1:04:19<19:47:47,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 633/12323 [1:04:19<19:47:47,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135413,8 +31529,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12086/12318 [20:56:29<24:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12086/12318 [20:56:29<24:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 634/12323 [1:04:20<19:46:22,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 634/12323 [1:04:20<19:46:22,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135422,8 +31538,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12087/12318 [20:56:38<24:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12087/12318 [20:56:38<24:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 635/12323 [1:04:24<19:45:35,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 635/12323 [1:04:24<19:45:35,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135431,8 +31547,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12088/12318 [20:56:44<23:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12088/12318 [20:56:44<23:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 636/12323 [1:04:28<19:44:48,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 636/12323 [1:04:28<19:44:48,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135440,8 +31556,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12089/12318 [20:56:47<23:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12089/12318 [20:56:47<23:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 637/12323 [1:04:31<19:43:42,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 637/12323 [1:04:31<19:43:42,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135449,8 +31565,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12090/12318 [20:56:55<23:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12090/12318 [20:56:55<23:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 638/12323 [1:04:39<19:44:19,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 638/12323 [1:04:39<19:44:19,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135458,8 +31574,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12091/12318 [20:57:00<23:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12091/12318 [20:57:00<23:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 639/12323 [1:04:41<19:42:45,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 639/12323 [1:04:41<19:42:45,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135467,8 +31583,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12092/12318 [20:57:08<23:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12092/12318 [20:57:08<23:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 640/12323 [1:05:34<19:57:02,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 640/12323 [1:05:34<19:57:02,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135476,8 +31592,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12093/12318 [20:57:11<23:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12093/12318 [20:57:11<23:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 641/12323 [1:05:39<19:56:33,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 641/12323 [1:05:39<19:56:33,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135485,8 +31601,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12094/12318 [20:57:20<23:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12094/12318 [20:57:20<23:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 642/12323 [1:05:46<19:56:51,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 642/12323 [1:05:46<19:56:51,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135494,8 +31610,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12095/12318 [20:57:28<23:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12095/12318 [20:57:28<23:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 643/12323 [1:05:54<19:57:09,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 643/12323 [1:05:54<19:57:09,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135503,8 +31619,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12096/12318 [20:58:06<23:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12096/12318 [20:58:06<23:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 644/12323 [1:06:01<19:57:27,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 644/12323 [1:06:01<19:57:27,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135512,8 +31628,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12097/12318 [20:58:10<22:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12097/12318 [20:58:10<22:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 645/12323 [1:06:06<19:56:58,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 645/12323 [1:06:06<19:56:58,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135521,8 +31637,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12098/12318 [20:58:17<22:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12098/12318 [20:58:17<22:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 646/12323 [1:06:11<19:56:29,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 646/12323 [1:06:11<19:56:29,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135530,8 +31646,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12099/12318 [20:58:19<22:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12099/12318 [20:58:19<22:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 647/12323 [1:06:14<19:55:23,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 647/12323 [1:06:14<19:55:23,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135539,8 +31655,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12100/12318 [20:58:25<22:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12100/12318 [20:58:25<22:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 648/12323 [1:06:18<19:54:44,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 648/12323 [1:06:18<19:54:44,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135548,8 +31664,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12101/12318 [20:58:32<22:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12101/12318 [20:58:32<22:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 649/12323 [1:06:26<19:54:59,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 649/12323 [1:06:26<19:54:59,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135557,8 +31673,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12102/12318 [20:58:40<22:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12102/12318 [20:58:40<22:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 650/12323 [1:06:31<19:54:38,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 650/12323 [1:06:31<19:54:38,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135566,8 +31682,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12103/12318 [20:58:47<22:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12103/12318 [20:58:47<22:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 651/12323 [1:06:38<19:54:53,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 651/12323 [1:06:38<19:54:53,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135575,8 +31691,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12104/12318 [20:58:50<22:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12104/12318 [20:58:50<22:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 652/12323 [1:06:40<19:53:29,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 652/12323 [1:06:40<19:53:29,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135584,8 +31700,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12105/12318 [20:58:51<22:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12105/12318 [20:58:51<22:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 653/12323 [1:06:47<19:53:44,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 653/12323 [1:06:47<19:53:44,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135593,8 +31709,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12106/12318 [20:59:00<22:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12106/12318 [20:59:00<22:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 654/12323 [1:06:54<19:53:40,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 654/12323 [1:06:54<19:53:40,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135602,8 +31718,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12107/12318 [20:59:04<21:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12107/12318 [20:59:04<21:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 655/12323 [1:07:01<19:53:55,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 655/12323 [1:07:01<19:53:55,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135611,8 +31727,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12108/12318 [20:59:12<21:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12108/12318 [20:59:12<21:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 656/12323 [1:07:04<19:52:49,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 656/12323 [1:07:04<19:52:49,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135620,8 +31736,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12109/12318 [20:59:21<21:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12109/12318 [20:59:21<21:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 657/12323 [1:07:09<19:52:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 657/12323 [1:07:09<19:52:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135629,8 +31745,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12110/12318 [20:59:28<21:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12110/12318 [20:59:28<21:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 658/12323 [1:07:14<19:52:06,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 658/12323 [1:07:14<19:52:06,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135638,8 +31754,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12111/12318 [20:59:34<21:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12111/12318 [20:59:34<21:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 659/12323 [1:07:15<19:50:34,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 659/12323 [1:07:15<19:50:34,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135647,8 +31763,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12112/12318 [20:59:40<21:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12112/12318 [20:59:40<21:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 660/12323 [1:07:21<19:50:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 660/12323 [1:07:21<19:50:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135656,8 +31772,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12113/12318 [20:59:43<21:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12113/12318 [20:59:43<21:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 661/12323 [1:07:28<19:50:28,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 661/12323 [1:07:28<19:50:28,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135665,7 +31781,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12114/12318 [20:59:50<21:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 662/12323 [1:07:34<19:50:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 662/12323 [1:07:34<19:50:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135673,7 +31790,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12114/12318 [20:59:50<21:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 663/12323 [1:07:36<19:49:04,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 663/12323 [1:07:36<19:49:04,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135681,8 +31799,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12115/12318 [20:59:57<21:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12115/12318 [20:59:57<21:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 664/12323 [1:07:40<19:48:17,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 664/12323 [1:07:40<19:48:17,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135690,8 +31808,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12116/12318 [21:00:04<21:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12116/12318 [21:00:04<21:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 665/12323 [1:07:48<19:48:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 665/12323 [1:07:48<19:48:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135699,8 +31817,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12117/12318 [21:00:11<20:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12117/12318 [21:00:11<20:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 666/12323 [1:07:53<19:48:20,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 666/12323 [1:07:53<19:48:20,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135708,8 +31826,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12118/12318 [21:00:16<20:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12118/12318 [21:00:16<20:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 667/12323 [1:07:54<19:46:49,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 667/12323 [1:07:54<19:46:49,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135717,8 +31835,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12119/12318 [21:00:25<20:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12119/12318 [21:00:25<20:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 668/12323 [1:08:00<19:46:29,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 668/12323 [1:08:00<19:46:29,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135726,8 +31844,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12120/12318 [21:00:31<20:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12120/12318 [21:00:31<20:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 669/12323 [1:08:03<19:45:32,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 669/12323 [1:08:03<19:45:32,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135735,8 +31853,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12121/12318 [21:00:38<20:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12121/12318 [21:00:38<20:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 670/12323 [1:08:05<19:44:11,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 670/12323 [1:08:05<19:44:11,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135744,8 +31862,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12122/12318 [21:00:41<20:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12122/12318 [21:00:41<20:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 671/12323 [1:08:06<19:42:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 671/12323 [1:08:06<19:42:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135753,8 +31871,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12123/12318 [21:00:47<20:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12123/12318 [21:00:47<20:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 672/12323 [1:08:47<19:52:46,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 672/12323 [1:08:47<19:52:46,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135762,8 +31880,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12124/12318 [21:00:51<20:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12124/12318 [21:00:51<20:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 673/12323 [1:08:50<19:51:41,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 673/12323 [1:08:50<19:51:41,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135771,8 +31889,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12125/12318 [21:00:54<20:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12125/12318 [21:00:54<20:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 674/12323 [1:08:54<19:50:55,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 674/12323 [1:08:54<19:50:55,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135780,8 +31898,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12126/12318 [21:01:02<19:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12126/12318 [21:01:02<19:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 675/12323 [1:08:57<19:49:59,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 675/12323 [1:08:57<19:49:59,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135789,8 +31907,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12127/12318 [21:01:07<19:51,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12127/12318 [21:01:07<19:51,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 676/12323 [1:09:03<19:49:41,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 676/12323 [1:09:03<19:49:41,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135798,8 +31916,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12128/12318 [21:01:24<19:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12128/12318 [21:01:24<19:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   5%| | 677/12323 [1:09:06<19:48:46,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   5%| | 677/12323 [1:09:06<19:48:46,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135807,8 +31925,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12129/12318 [21:01:28<19:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12129/12318 [21:01:28<19:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 678/12323 [1:09:08<19:47:35,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 678/12323 [1:09:08<19:47:35,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135816,8 +31934,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12130/12318 [21:01:32<19:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12130/12318 [21:01:32<19:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 679/12323 [1:09:16<19:47:51,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 679/12323 [1:09:16<19:47:51,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135825,8 +31943,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12131/12318 [21:01:38<19:26,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12131/12318 [21:01:38<19:26,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 680/12323 [1:09:20<19:47:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 680/12323 [1:09:20<19:47:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135834,8 +31952,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12132/12318 [21:01:41<19:20,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12132/12318 [21:01:41<19:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 681/12323 [1:09:27<19:47:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 681/12323 [1:09:27<19:47:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135843,8 +31961,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  98%|▉| 12133/12318 [21:01:47<19:14,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  98%|▉| 12133/12318 [21:01:47<19:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 682/12323 [1:09:30<19:46:29,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 682/12323 [1:09:30<19:46:29,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135852,8 +31970,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12134/12318 [21:01:52<19:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12134/12318 [21:01:52<19:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 683/12323 [1:09:38<19:46:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 683/12323 [1:09:38<19:46:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135861,8 +31979,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12135/12318 [21:01:53<19:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12135/12318 [21:01:53<19:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 684/12323 [1:09:41<19:45:51,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 684/12323 [1:09:41<19:45:51,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135870,8 +31988,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12136/12318 [21:01:58<18:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12136/12318 [21:01:58<18:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 685/12323 [1:09:46<19:45:24,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 685/12323 [1:09:46<19:45:24,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135879,8 +31997,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12137/12318 [21:02:01<18:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12137/12318 [21:02:01<18:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 686/12323 [1:09:53<19:45:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 686/12323 [1:09:53<19:45:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135888,8 +32006,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12138/12318 [21:02:03<18:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12138/12318 [21:02:03<18:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 687/12323 [1:10:02<19:46:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 687/12323 [1:10:02<19:46:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135897,8 +32015,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12139/12318 [21:02:04<18:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12139/12318 [21:02:04<18:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 688/12323 [1:10:09<19:46:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 688/12323 [1:10:09<19:46:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135906,8 +32024,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12140/12318 [21:02:05<18:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12140/12318 [21:02:05<18:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 689/12323 [1:10:11<19:45:13,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 689/12323 [1:10:11<19:45:13,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135915,8 +32033,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12141/12318 [21:02:13<18:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12141/12318 [21:02:13<18:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 690/12323 [1:10:15<19:44:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 690/12323 [1:10:15<19:44:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135924,8 +32042,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12142/12318 [21:02:18<18:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12142/12318 [21:02:18<18:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 691/12323 [1:10:20<19:44:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 691/12323 [1:10:20<19:44:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135933,8 +32051,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12143/12318 [21:02:26<18:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12143/12318 [21:02:26<18:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 692/12323 [1:10:29<19:44:44,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 692/12323 [1:10:29<19:44:44,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135942,8 +32060,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12144/12318 [21:02:33<18:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12144/12318 [21:02:33<18:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 693/12323 [1:10:37<19:45:17,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 693/12323 [1:10:37<19:45:17,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135951,8 +32069,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12145/12318 [21:02:37<17:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12145/12318 [21:02:37<17:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 694/12323 [1:10:43<19:45:00,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 694/12323 [1:10:43<19:45:00,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135960,8 +32078,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12146/12318 [21:02:40<17:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12146/12318 [21:02:40<17:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 695/12323 [1:10:49<19:44:58,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 695/12323 [1:10:49<19:44:58,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135969,8 +32087,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12147/12318 [21:02:49<17:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12147/12318 [21:02:49<17:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 696/12323 [1:10:50<19:43:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 696/12323 [1:10:50<19:43:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135978,8 +32096,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12148/12318 [21:02:58<17:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12148/12318 [21:02:58<17:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 697/12323 [1:10:54<19:42:38,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 697/12323 [1:10:54<19:42:38,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135987,8 +32105,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12149/12318 [21:03:03<17:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12149/12318 [21:03:03<17:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 698/12323 [1:10:59<19:42:19,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 698/12323 [1:10:59<19:42:19,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -135996,8 +32114,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12150/12318 [21:03:06<17:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12150/12318 [21:03:06<17:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 699/12323 [1:11:06<19:42:34,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 699/12323 [1:11:06<19:42:34,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136005,8 +32123,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12151/12318 [21:03:14<17:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12151/12318 [21:03:14<17:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 700/12323 [1:11:10<19:41:50,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 700/12323 [1:11:10<19:41:50,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136014,8 +32132,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12152/12318 [21:03:17<17:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12152/12318 [21:03:17<17:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 701/12323 [1:11:17<19:42:04,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 701/12323 [1:11:17<19:42:04,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136023,8 +32141,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12153/12318 [21:03:24<17:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12153/12318 [21:03:24<17:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 702/12323 [1:11:22<19:41:37,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 702/12323 [1:11:22<19:41:37,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136032,8 +32150,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12154/12318 [21:03:30<17:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12154/12318 [21:03:30<17:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 703/12323 [1:11:30<19:41:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 703/12323 [1:11:30<19:41:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136041,8 +32159,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12155/12318 [21:03:38<16:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12155/12318 [21:03:38<16:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 704/12323 [1:11:57<19:47:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 704/12323 [1:11:57<19:47:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136050,8 +32168,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12156/12318 [21:03:47<16:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12156/12318 [21:03:47<16:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 705/12323 [1:12:04<19:47:53,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 705/12323 [1:12:04<19:47:53,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136059,8 +32177,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12157/12318 [21:03:55<16:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12157/12318 [21:03:55<16:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 706/12323 [1:12:09<19:47:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 706/12323 [1:12:09<19:47:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136068,8 +32186,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12158/12318 [21:04:04<16:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12158/12318 [21:04:04<16:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 707/12323 [1:12:16<19:47:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 707/12323 [1:12:16<19:47:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136077,8 +32195,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12159/12318 [21:04:13<16:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12159/12318 [21:04:13<16:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 708/12323 [1:12:19<19:46:34,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 708/12323 [1:12:19<19:46:34,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136086,8 +32204,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12160/12318 [21:04:40<16:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12160/12318 [21:04:40<16:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 709/12323 [1:12:26<19:46:33,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 709/12323 [1:12:26<19:46:33,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136095,8 +32213,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12161/12318 [21:04:46<16:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12161/12318 [21:04:46<16:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 710/12323 [1:12:27<19:45:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 710/12323 [1:12:27<19:45:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136104,8 +32222,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12162/12318 [21:04:51<16:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12162/12318 [21:04:51<16:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 711/12323 [1:12:35<19:45:32,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 711/12323 [1:12:35<19:45:32,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136113,8 +32231,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12163/12318 [21:04:55<16:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12163/12318 [21:04:55<16:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 712/12323 [1:12:40<19:45:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 712/12323 [1:12:40<19:45:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136122,8 +32240,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12164/12318 [21:05:01<16:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12164/12318 [21:05:01<16:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 713/12323 [1:12:45<19:44:40,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 713/12323 [1:12:45<19:44:40,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136131,8 +32249,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12165/12318 [21:05:05<15:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12165/12318 [21:05:05<15:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 714/12323 [1:12:47<19:43:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 714/12323 [1:12:47<19:43:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136140,8 +32258,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12166/12318 [21:05:08<15:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12166/12318 [21:05:08<15:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 715/12323 [1:12:48<19:41:59,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 715/12323 [1:12:48<19:41:59,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136149,8 +32267,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12167/12318 [21:05:16<15:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12167/12318 [21:05:16<15:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 716/12323 [1:12:56<19:42:30,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 716/12323 [1:12:56<19:42:30,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136158,8 +32276,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12168/12318 [21:05:18<15:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12168/12318 [21:05:18<15:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 717/12323 [1:13:02<19:42:12,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 717/12323 [1:13:02<19:42:12,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136167,8 +32285,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12169/12318 [21:05:23<15:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12169/12318 [21:05:23<15:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 718/12323 [1:13:10<19:42:44,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 718/12323 [1:13:10<19:42:44,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136176,8 +32294,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12170/12318 [21:05:28<15:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12170/12318 [21:05:28<15:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 719/12323 [1:13:14<19:42:09,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 719/12323 [1:13:14<19:42:09,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136185,8 +32303,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12171/12318 [21:05:34<15:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12171/12318 [21:05:34<15:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 720/12323 [1:13:16<19:40:54,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 720/12323 [1:13:16<19:40:54,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136194,8 +32312,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12172/12318 [21:05:42<15:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12172/12318 [21:05:42<15:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 721/12323 [1:13:21<19:40:28,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 721/12323 [1:13:21<19:40:28,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136203,8 +32321,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12173/12318 [21:05:44<15:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12173/12318 [21:05:44<15:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 722/12323 [1:13:27<19:40:11,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 722/12323 [1:13:27<19:40:11,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136212,8 +32330,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12174/12318 [21:05:49<14:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12174/12318 [21:05:49<14:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 723/12323 [1:13:29<19:39:12,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 723/12323 [1:13:29<19:39:12,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136221,8 +32339,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12175/12318 [21:05:53<14:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12175/12318 [21:05:53<14:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 724/12323 [1:13:38<19:39:44,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 724/12323 [1:13:38<19:39:44,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136230,8 +32348,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12176/12318 [21:05:58<14:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12176/12318 [21:05:58<14:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 725/12323 [1:13:46<19:40:16,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 725/12323 [1:13:46<19:40:16,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136239,8 +32357,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12177/12318 [21:06:01<14:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12177/12318 [21:06:01<14:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 726/12323 [1:13:48<19:39:02,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 726/12323 [1:13:48<19:39:02,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136248,8 +32366,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12178/12318 [21:06:07<14:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12178/12318 [21:06:07<14:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 727/12323 [1:13:55<19:39:01,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 727/12323 [1:13:55<19:39:02,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136257,8 +32375,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12179/12318 [21:06:10<14:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12179/12318 [21:06:10<14:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 728/12323 [1:13:59<19:38:36,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 728/12323 [1:13:59<19:38:36,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136266,8 +32384,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12180/12318 [21:06:15<14:20,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12180/12318 [21:06:15<14:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 729/12323 [1:14:02<19:37:37,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 729/12323 [1:14:02<19:37:37,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136275,8 +32393,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12181/12318 [21:06:20<14:14,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12181/12318 [21:06:20<14:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 730/12323 [1:14:08<19:37:20,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 730/12323 [1:14:08<19:37:20,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136284,8 +32402,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12182/12318 [21:06:29<14:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12182/12318 [21:06:29<14:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 731/12323 [1:14:16<19:37:51,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 731/12323 [1:14:16<19:37:51,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136293,8 +32411,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12183/12318 [21:06:33<14:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12183/12318 [21:06:33<14:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 732/12323 [1:14:20<19:37:09,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 732/12323 [1:14:20<19:37:09,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136302,8 +32420,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12184/12318 [21:06:41<13:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12184/12318 [21:06:41<13:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 733/12323 [1:14:24<19:36:27,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 733/12323 [1:14:24<19:36:27,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136311,8 +32429,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12185/12318 [21:06:43<13:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12185/12318 [21:06:43<13:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 734/12323 [1:14:29<19:36:10,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 734/12323 [1:14:29<19:36:10,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136320,8 +32438,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12186/12318 [21:06:51<13:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12186/12318 [21:06:51<13:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 735/12323 [1:14:38<19:36:41,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 735/12323 [1:14:38<19:36:41,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136329,8 +32447,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12187/12318 [21:06:55<13:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12187/12318 [21:06:55<13:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 736/12323 [1:15:06<19:42:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 736/12323 [1:15:06<19:42:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136338,8 +32456,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12188/12318 [21:07:00<13:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12188/12318 [21:07:00<13:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 737/12323 [1:15:13<19:42:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 737/12323 [1:15:13<19:42:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136347,8 +32465,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12189/12318 [21:07:06<13:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12189/12318 [21:07:06<13:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 738/12323 [1:15:17<19:41:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 738/12323 [1:15:17<19:41:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136356,8 +32474,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12190/12318 [21:07:12<13:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12190/12318 [21:07:12<13:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 739/12323 [1:15:25<19:42:21,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 739/12323 [1:15:25<19:42:21,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136365,8 +32483,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12191/12318 [21:07:14<13:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12191/12318 [21:07:14<13:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 740/12323 [1:15:28<19:41:15,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 740/12323 [1:15:28<19:41:15,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136374,8 +32492,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12192/12318 [21:08:01<13:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12192/12318 [21:08:01<13:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 741/12323 [1:15:34<19:41:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 741/12323 [1:15:34<19:41:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136383,8 +32501,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12193/12318 [21:08:10<13:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12193/12318 [21:08:10<13:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 742/12323 [1:15:40<19:41:13,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 742/12323 [1:15:40<19:41:13,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136392,8 +32510,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12194/12318 [21:08:17<12:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12194/12318 [21:08:17<12:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 743/12323 [1:15:49<19:41:43,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 743/12323 [1:15:49<19:41:43,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136401,8 +32519,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12195/12318 [21:08:21<12:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12195/12318 [21:08:21<12:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 744/12323 [1:15:54<19:41:18,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 744/12323 [1:15:54<19:41:18,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136410,8 +32528,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12196/12318 [21:08:28<12:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12196/12318 [21:08:28<12:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 745/12323 [1:15:58<19:40:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 745/12323 [1:15:58<19:40:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136419,8 +32537,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12197/12318 [21:08:33<12:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12197/12318 [21:08:33<12:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 746/12323 [1:16:03<19:40:19,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 746/12323 [1:16:03<19:40:19,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136428,8 +32546,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12198/12318 [21:08:40<12:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12198/12318 [21:08:40<12:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 747/12323 [1:16:09<19:40:18,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 747/12323 [1:16:09<19:40:18,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136437,8 +32555,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12199/12318 [21:08:46<12:22,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12199/12318 [21:08:46<12:22,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 748/12323 [1:16:18<19:40:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 748/12323 [1:16:18<19:40:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136446,8 +32564,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12200/12318 [21:08:52<12:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12200/12318 [21:08:52<12:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 749/12323 [1:16:22<19:40:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 749/12323 [1:16:22<19:40:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136455,8 +32573,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12201/12318 [21:09:00<12:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12201/12318 [21:09:00<12:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 750/12323 [1:16:26<19:39:27,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 750/12323 [1:16:26<19:39:27,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136464,8 +32582,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12202/12318 [21:09:09<12:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12202/12318 [21:09:09<12:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 751/12323 [1:16:34<19:39:55,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 751/12323 [1:16:34<19:39:55,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136473,8 +32591,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12203/12318 [21:09:18<11:57,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12203/12318 [21:09:18<11:57,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 752/12323 [1:16:39<19:39:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 752/12323 [1:16:39<19:39:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136482,8 +32600,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12204/12318 [21:09:26<11:51,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12204/12318 [21:09:26<11:51,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 753/12323 [1:16:44<19:39:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 753/12323 [1:16:44<19:39:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136491,8 +32609,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12205/12318 [21:09:30<11:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12205/12318 [21:09:30<11:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 754/12323 [1:16:45<19:37:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 754/12323 [1:16:45<19:37:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136500,8 +32618,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12206/12318 [21:09:34<11:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12206/12318 [21:09:34<11:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 755/12323 [1:16:53<19:38:02,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 755/12323 [1:16:53<19:38:02,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136509,8 +32627,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12207/12318 [21:09:42<11:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12207/12318 [21:09:42<11:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 756/12323 [1:17:00<19:38:13,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 756/12323 [1:17:00<19:38:13,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136518,8 +32636,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12208/12318 [21:09:49<11:26,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12208/12318 [21:09:49<11:26,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 757/12323 [1:17:08<19:38:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 757/12323 [1:17:08<19:38:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136527,7 +32645,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12209/12318 [21:09:55<11:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 758/12323 [1:17:10<19:37:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 758/12323 [1:17:10<19:37:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136535,7 +32654,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12209/12318 [21:09:55<11:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 759/12323 [1:17:14<19:36:57,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 759/12323 [1:17:14<19:36:57,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136543,8 +32663,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12210/12318 [21:09:57<11:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12210/12318 [21:09:57<11:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 760/12323 [1:17:21<19:36:55,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 760/12323 [1:17:21<19:36:55,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136552,8 +32672,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12211/12318 [21:10:02<11:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12211/12318 [21:10:02<11:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 761/12323 [1:17:28<19:37:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 761/12323 [1:17:28<19:37:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136561,8 +32681,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12212/12318 [21:10:08<11:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12212/12318 [21:10:08<11:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 762/12323 [1:17:37<19:37:35,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 762/12323 [1:17:37<19:37:35,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136570,8 +32690,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12213/12318 [21:10:12<10:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12213/12318 [21:10:12<10:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 763/12323 [1:17:40<19:36:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 763/12323 [1:17:40<19:36:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136579,8 +32699,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12214/12318 [21:10:21<10:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12214/12318 [21:10:21<10:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 764/12323 [1:17:44<19:36:04,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 764/12323 [1:17:44<19:36:04,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136588,8 +32708,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12215/12318 [21:10:27<10:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12215/12318 [21:10:27<10:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 765/12323 [1:17:45<19:34:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 765/12323 [1:17:45<19:34:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136597,8 +32717,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12216/12318 [21:10:31<10:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12216/12318 [21:10:31<10:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 766/12323 [1:17:51<19:34:33,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 766/12323 [1:17:51<19:34:33,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136606,8 +32726,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12217/12318 [21:10:38<10:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12217/12318 [21:10:38<10:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 767/12323 [1:17:53<19:33:36,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 767/12323 [1:17:53<19:33:36,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136615,8 +32735,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12218/12318 [21:10:41<10:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12218/12318 [21:10:41<10:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 768/12323 [1:18:16<19:37:47,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 768/12323 [1:18:16<19:37:47,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136624,8 +32744,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12219/12318 [21:10:47<10:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12219/12318 [21:10:47<10:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 769/12323 [1:18:25<19:38:17,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 769/12323 [1:18:25<19:38:17,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136633,8 +32753,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12220/12318 [21:10:53<10:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12220/12318 [21:10:53<10:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 770/12323 [1:18:29<19:37:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 770/12323 [1:18:29<19:37:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136642,8 +32762,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12221/12318 [21:10:58<10:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12221/12318 [21:10:58<10:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 771/12323 [1:18:33<19:36:58,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 771/12323 [1:18:33<19:36:58,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136651,8 +32771,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12222/12318 [21:11:02<09:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12222/12318 [21:11:02<09:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 772/12323 [1:18:36<19:36:03,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 772/12323 [1:18:36<19:36:03,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136660,8 +32780,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12223/12318 [21:11:05<09:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12223/12318 [21:11:05<09:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 773/12323 [1:18:40<19:35:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 773/12323 [1:18:40<19:35:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136669,8 +32789,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12224/12318 [21:11:21<09:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12224/12318 [21:11:21<09:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 774/12323 [1:18:44<19:34:58,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 774/12323 [1:18:44<19:34:58,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136678,8 +32798,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12225/12318 [21:11:28<09:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12225/12318 [21:11:28<09:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 775/12323 [1:18:53<19:35:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 775/12323 [1:18:53<19:35:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136687,8 +32807,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12226/12318 [21:11:29<09:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12226/12318 [21:11:29<09:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 776/12323 [1:18:54<19:34:16,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 776/12323 [1:18:54<19:34:16,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136696,8 +32816,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12227/12318 [21:11:35<09:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12227/12318 [21:11:35<09:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 777/12323 [1:18:59<19:33:50,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 777/12323 [1:18:59<19:33:50,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136705,8 +32825,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12228/12318 [21:11:40<09:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12228/12318 [21:11:40<09:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 778/12323 [1:19:03<19:33:03,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 778/12323 [1:19:03<19:33:03,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136714,8 +32834,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12229/12318 [21:11:48<09:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12229/12318 [21:11:48<09:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 779/12323 [1:19:09<19:33:00,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 779/12323 [1:19:09<19:33:00,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136723,8 +32843,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12230/12318 [21:11:52<09:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12230/12318 [21:11:52<09:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 780/12323 [1:19:11<19:31:50,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 780/12323 [1:19:11<19:31:50,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136732,7 +32852,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12231/12318 [21:11:59<09:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 781/12323 [1:19:19<19:32:17,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 781/12323 [1:19:19<19:32:17,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136740,7 +32861,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12231/12318 [21:11:59<09:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 782/12323 [1:19:24<19:31:58,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136748,8 +32869,7 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12232/12318 [21:12:06<08:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12232/12318 [21:12:06<08:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 782/12323 [1:19:24<19:31:58,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136757,8 +32877,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12233/12318 [21:12:10<08:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12233/12318 [21:12:10<08:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 783/12323 [1:19:29<19:31:40,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 783/12323 [1:19:29<19:31:40,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136766,8 +32886,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12234/12318 [21:12:18<08:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12234/12318 [21:12:18<08:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 784/12323 [1:19:38<19:32:05,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 784/12323 [1:19:38<19:32:05,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136775,8 +32895,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12235/12318 [21:12:26<08:37,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12235/12318 [21:12:26<08:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 785/12323 [1:19:44<19:32:01,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 785/12323 [1:19:44<19:32:01,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136784,8 +32904,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12236/12318 [21:12:28<08:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12236/12318 [21:12:28<08:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 786/12323 [1:19:51<19:32:13,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 786/12323 [1:19:51<19:32:13,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136793,8 +32913,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12237/12318 [21:12:34<08:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12237/12318 [21:12:34<08:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 787/12323 [1:19:56<19:31:49,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 787/12323 [1:19:56<19:31:49,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136802,8 +32922,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12238/12318 [21:12:35<08:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12238/12318 [21:12:35<08:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 788/12323 [1:19:58<19:30:39,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 788/12323 [1:19:58<19:30:39,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136811,8 +32931,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12239/12318 [21:12:38<08:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12239/12318 [21:12:38<08:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 789/12323 [1:20:02<19:30:07,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 789/12323 [1:20:02<19:30:07,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136820,8 +32940,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12240/12318 [21:12:42<08:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12240/12318 [21:12:42<08:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 790/12323 [1:20:07<19:29:36,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 790/12323 [1:20:07<19:29:36,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136829,8 +32949,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12241/12318 [21:12:48<08:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12241/12318 [21:12:48<08:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 791/12323 [1:20:13<19:29:35,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 791/12323 [1:20:13<19:29:35,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136838,8 +32958,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12242/12318 [21:12:54<07:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12242/12318 [21:12:54<07:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 792/12323 [1:20:21<19:30:03,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 792/12323 [1:20:21<19:30:03,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136847,8 +32967,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12243/12318 [21:12:56<07:47,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12243/12318 [21:12:56<07:47,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 793/12323 [1:20:24<19:29:09,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 793/12323 [1:20:24<19:29:09,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136856,8 +32976,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12244/12318 [21:12:59<07:41,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12244/12318 [21:12:59<07:41,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 794/12323 [1:20:33<19:29:37,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 794/12323 [1:20:33<19:29:37,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136865,8 +32985,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12245/12318 [21:13:08<07:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12245/12318 [21:13:08<07:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 795/12323 [1:20:38<19:29:19,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 795/12323 [1:20:38<19:29:19,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136874,8 +32994,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12246/12318 [21:13:15<07:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12246/12318 [21:13:15<07:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 796/12323 [1:20:41<19:28:32,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 796/12323 [1:20:41<19:28:32,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136883,8 +33003,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12247/12318 [21:13:18<07:22,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12247/12318 [21:13:18<07:22,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 797/12323 [1:20:44<19:27:44,  6.08s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 797/12323 [1:20:44<19:27:44,  6.08s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136892,8 +33012,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12248/12318 [21:13:19<07:16,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12248/12318 [21:13:19<07:16,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 798/12323 [1:20:47<19:26:50,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 798/12323 [1:20:47<19:26:50,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136901,8 +33021,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12249/12318 [21:13:28<07:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12249/12318 [21:13:28<07:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 799/12323 [1:20:51<19:26:10,  6.07s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 799/12323 [1:20:51<19:26:10,  6.07s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136910,26 +33030,36 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12250/12318 [21:13:33<07:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12250/12318 [21:13:33<07:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   6%| | 800/12323 [1:21:41<19:36:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   6%| | 800/12323 [1:21:41<19:36:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  99%|▉| 12251/12318 [21:13:34<06:57,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12251/12318 [21:13:34<06:57,  6.24s/it, v_num=e4xv, train/loss="
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "Epoch 0:  99%|▉| 12252/12318 [21:13:40<06:51,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12252/12318 [21:13:40<06:51,  6.24s/it, v_num=e4xv, train/loss="
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n",
+      "/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\r\n",
+      "  warnings.warn(\r\n"
      ]
     },
     {
@@ -136937,8 +33067,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12253/12318 [21:13:43<06:45,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12253/12318 [21:13:43<06:45,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 801/12323 [1:21:59<19:39:25,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 801/12323 [1:21:59<19:39:25,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136946,8 +33076,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12254/12318 [21:13:46<06:39,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12254/12318 [21:13:46<06:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 802/12323 [1:22:07<19:39:39,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 802/12323 [1:22:07<19:39:39,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136955,8 +33085,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12255/12318 [21:13:48<06:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12255/12318 [21:13:48<06:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 803/12323 [1:22:15<19:40:06,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 803/12323 [1:22:15<19:40:06,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136964,8 +33094,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0:  99%|▉| 12256/12318 [21:14:37<06:26,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0:  99%|▉| 12256/12318 [21:14:37<06:26,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 804/12323 [1:22:24<19:40:34,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 804/12323 [1:22:24<19:40:34,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136973,8 +33103,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12257/12318 [21:14:38<06:20,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12257/12318 [21:14:38<06:20,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 805/12323 [1:22:28<19:40:02,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 805/12323 [1:22:28<19:40:02,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136982,7 +33112,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12258/12318 [21:14:41<06:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 806/12323 [1:22:36<19:40:28,  6.15s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 806/12323 [1:22:36<19:40:28,  6.15s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136990,7 +33121,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12258/12318 [21:14:41<06:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 807/12323 [1:22:38<19:39:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 807/12323 [1:22:38<19:39:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -136998,8 +33130,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12259/12318 [21:14:44<06:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12259/12318 [21:14:44<06:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 808/12323 [1:22:43<19:39:01,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 808/12323 [1:22:43<19:39:01,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137007,8 +33139,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12260/12318 [21:14:49<06:01,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12260/12318 [21:14:49<06:01,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 809/12323 [1:22:48<19:38:30,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 809/12323 [1:22:48<19:38:30,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137016,8 +33148,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12261/12318 [21:14:56<05:55,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12261/12318 [21:14:56<05:55,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 810/12323 [1:22:49<19:37:15,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 810/12323 [1:22:49<19:37:15,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137025,8 +33157,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12262/12318 [21:15:02<05:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12262/12318 [21:15:02<05:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 811/12323 [1:22:55<19:37:12,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 811/12323 [1:22:55<19:37:12,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137034,8 +33166,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12263/12318 [21:15:06<05:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12263/12318 [21:15:06<05:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 812/12323 [1:23:02<19:37:09,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 812/12323 [1:23:02<19:37:09,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137043,8 +33175,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12264/12318 [21:15:10<05:36,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12264/12318 [21:15:10<05:36,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 813/12323 [1:23:09<19:37:19,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 813/12323 [1:23:09<19:37:19,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137052,8 +33184,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12265/12318 [21:15:11<05:30,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12265/12318 [21:15:11<05:30,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 814/12323 [1:23:14<19:37:03,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 814/12323 [1:23:14<19:37:03,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137061,8 +33193,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12266/12318 [21:15:17<05:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12266/12318 [21:15:17<05:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 815/12323 [1:23:19<19:36:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 815/12323 [1:23:19<19:36:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137070,8 +33202,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12267/12318 [21:15:21<05:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12267/12318 [21:15:21<05:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 816/12323 [1:23:22<19:35:39,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 816/12323 [1:23:22<19:35:39,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137079,8 +33211,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12268/12318 [21:15:27<05:11,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12268/12318 [21:15:27<05:11,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 817/12323 [1:23:28<19:35:37,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 817/12323 [1:23:28<19:35:37,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137088,8 +33220,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12269/12318 [21:15:30<05:05,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12269/12318 [21:15:30<05:05,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 818/12323 [1:23:31<19:34:52,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 818/12323 [1:23:31<19:34:52,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137097,8 +33229,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12270/12318 [21:15:32<04:59,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12270/12318 [21:15:32<04:59,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 819/12323 [1:23:36<19:34:22,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 819/12323 [1:23:36<19:34:22,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137106,8 +33238,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12271/12318 [21:15:38<04:53,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12271/12318 [21:15:38<04:53,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 820/12323 [1:23:41<19:33:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 820/12323 [1:23:41<19:33:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137115,8 +33247,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12272/12318 [21:15:45<04:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12272/12318 [21:15:45<04:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 821/12323 [1:23:42<19:32:44,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 821/12323 [1:23:42<19:32:44,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137124,8 +33256,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12273/12318 [21:15:49<04:40,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12273/12318 [21:15:49<04:40,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 822/12323 [1:23:51<19:33:11,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 822/12323 [1:23:51<19:33:11,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137133,8 +33265,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12274/12318 [21:15:56<04:34,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12274/12318 [21:15:56<04:34,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 823/12323 [1:23:55<19:32:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 823/12323 [1:23:55<19:32:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137142,8 +33274,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12275/12318 [21:16:01<04:28,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12275/12318 [21:16:01<04:28,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 824/12323 [1:24:01<19:32:32,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 824/12323 [1:24:01<19:32:32,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137151,8 +33283,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12276/12318 [21:16:09<04:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12276/12318 [21:16:09<04:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 825/12323 [1:24:06<19:32:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 825/12323 [1:24:06<19:32:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137160,8 +33292,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12277/12318 [21:16:18<04:15,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12277/12318 [21:16:18<04:15,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 826/12323 [1:24:10<19:31:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 826/12323 [1:24:10<19:31:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137169,8 +33301,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12278/12318 [21:16:25<04:09,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12278/12318 [21:16:25<04:09,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 827/12323 [1:24:14<19:31:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 827/12323 [1:24:14<19:31:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137178,8 +33310,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12279/12318 [21:16:27<04:03,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12279/12318 [21:16:27<04:03,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 828/12323 [1:24:19<19:30:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 828/12323 [1:24:19<19:30:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137187,8 +33319,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12280/12318 [21:16:35<03:57,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12280/12318 [21:16:35<03:57,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 829/12323 [1:24:24<19:30:16,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 829/12323 [1:24:24<19:30:16,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137196,8 +33328,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12281/12318 [21:16:39<03:50,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12281/12318 [21:16:39<03:50,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 830/12323 [1:24:32<19:30:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 830/12323 [1:24:32<19:30:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137205,8 +33337,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12282/12318 [21:16:47<03:44,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12282/12318 [21:16:47<03:44,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 831/12323 [1:24:37<19:30:21,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 831/12323 [1:24:37<19:30:21,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137214,8 +33346,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12283/12318 [21:16:52<03:38,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12283/12318 [21:16:52<03:38,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 832/12323 [1:25:07<19:35:46,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 832/12323 [1:25:07<19:35:46,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137223,8 +33355,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12284/12318 [21:17:01<03:32,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12284/12318 [21:17:01<03:32,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 833/12323 [1:25:12<19:35:23,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 833/12323 [1:25:12<19:35:23,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137232,8 +33364,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12285/12318 [21:17:08<03:25,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12285/12318 [21:17:08<03:25,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 834/12323 [1:25:16<19:34:38,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 834/12323 [1:25:16<19:34:38,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137241,8 +33373,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12286/12318 [21:17:10<03:19,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12286/12318 [21:17:10<03:19,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 835/12323 [1:25:21<19:34:20,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 835/12323 [1:25:21<19:34:20,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137250,8 +33382,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12287/12318 [21:17:17<03:13,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12287/12318 [21:17:17<03:13,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 836/12323 [1:25:28<19:34:31,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 836/12323 [1:25:28<19:34:31,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137259,8 +33391,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12288/12318 [21:17:42<03:07,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12288/12318 [21:17:42<03:07,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 837/12323 [1:25:32<19:33:53,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 837/12323 [1:25:32<19:33:53,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137268,8 +33400,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12289/12318 [21:17:50<03:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12289/12318 [21:17:50<03:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 838/12323 [1:25:38<19:33:50,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 838/12323 [1:25:38<19:33:50,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137277,8 +33409,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12290/12318 [21:17:57<02:54,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12290/12318 [21:17:57<02:54,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 839/12323 [1:25:46<19:34:00,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 839/12323 [1:25:46<19:34:00,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137286,8 +33418,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12291/12318 [21:18:01<02:48,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12291/12318 [21:18:01<02:48,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 840/12323 [1:25:53<19:34:10,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 840/12323 [1:25:53<19:34:10,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137295,8 +33427,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12292/12318 [21:18:04<02:42,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12292/12318 [21:18:04<02:42,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 841/12323 [1:25:56<19:33:26,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 841/12323 [1:25:56<19:33:26,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137304,8 +33436,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12293/12318 [21:18:09<02:35,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12293/12318 [21:18:09<02:35,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 842/12323 [1:25:58<19:32:13,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 842/12323 [1:25:58<19:32:13,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137313,8 +33445,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12294/12318 [21:18:16<02:29,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12294/12318 [21:18:16<02:29,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 843/12323 [1:26:05<19:32:24,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 843/12323 [1:26:05<19:32:24,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137322,8 +33454,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12295/12318 [21:18:17<02:23,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12295/12318 [21:18:17<02:23,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 844/12323 [1:26:12<19:32:22,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 844/12323 [1:26:12<19:32:22,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137331,8 +33463,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12296/12318 [21:18:22<02:17,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12296/12318 [21:18:22<02:17,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 845/12323 [1:26:19<19:32:34,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 845/12323 [1:26:19<19:32:34,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137340,8 +33472,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12297/12318 [21:18:25<02:10,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12297/12318 [21:18:25<02:10,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 846/12323 [1:26:25<19:32:30,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 846/12323 [1:26:25<19:32:30,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137349,8 +33481,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12298/12318 [21:18:28<02:04,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12298/12318 [21:18:28<02:04,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 847/12323 [1:26:30<19:32:07,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 847/12323 [1:26:30<19:32:07,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137358,8 +33490,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12299/12318 [21:18:37<01:58,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12299/12318 [21:18:37<01:58,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 848/12323 [1:26:37<19:32:17,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 848/12323 [1:26:37<19:32:17,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137367,8 +33499,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12300/12318 [21:18:42<01:52,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12300/12318 [21:18:42<01:52,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 849/12323 [1:26:42<19:31:46,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 849/12323 [1:26:42<19:31:46,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137376,8 +33508,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12301/12318 [21:18:46<01:46,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12301/12318 [21:18:46<01:46,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 850/12323 [1:26:46<19:31:16,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 850/12323 [1:26:46<19:31:16,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137385,7 +33517,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12302/12318 [21:18:50<01:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 851/12323 [1:26:49<19:30:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 851/12323 [1:26:49<19:30:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137393,7 +33526,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12302/12318 [21:18:50<01:39,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 852/12323 [1:26:53<19:29:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 852/12323 [1:26:53<19:29:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137401,8 +33535,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12303/12318 [21:18:56<01:33,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12303/12318 [21:18:56<01:33,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 853/12323 [1:26:59<19:29:38,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 853/12323 [1:26:59<19:29:38,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137410,8 +33544,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12304/12318 [21:19:02<01:27,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12304/12318 [21:19:02<01:27,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 854/12323 [1:27:04<19:29:22,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 854/12323 [1:27:04<19:29:22,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137419,8 +33553,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12305/12318 [21:19:07<01:21,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12305/12318 [21:19:07<01:21,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 855/12323 [1:27:06<19:28:25,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 855/12323 [1:27:06<19:28:25,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137428,8 +33562,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12306/12318 [21:19:12<01:14,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12306/12318 [21:19:12<01:14,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 856/12323 [1:27:15<19:28:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 856/12323 [1:27:15<19:28:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137437,8 +33571,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12307/12318 [21:19:18<01:08,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12307/12318 [21:19:18<01:08,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 857/12323 [1:27:23<19:29:12,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 857/12323 [1:27:23<19:29:12,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137446,8 +33580,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12308/12318 [21:19:26<01:02,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12308/12318 [21:19:26<01:02,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 858/12323 [1:27:31<19:29:29,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 858/12323 [1:27:31<19:29:29,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137455,8 +33589,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12309/12318 [21:19:32<00:56,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12309/12318 [21:19:32<00:56,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 859/12323 [1:27:34<19:28:38,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 859/12323 [1:27:34<19:28:38,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137464,8 +33598,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12310/12318 [21:19:39<00:49,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12310/12318 [21:19:39<00:49,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 860/12323 [1:27:41<19:28:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 860/12323 [1:27:41<19:28:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137473,8 +33607,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12311/12318 [21:19:48<00:43,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12311/12318 [21:19:48<00:43,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 861/12323 [1:27:49<19:29:13,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 861/12323 [1:27:49<19:29:13,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137482,7 +33616,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12312/12318 [21:19:49<00:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 862/12323 [1:27:55<19:28:57,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 862/12323 [1:27:55<19:28:57,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137490,7 +33625,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12312/12318 [21:19:49<00:37,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 863/12323 [1:28:03<19:29:20,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 863/12323 [1:28:03<19:29:20,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137498,8 +33634,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12313/12318 [21:19:55<00:31,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12313/12318 [21:19:55<00:31,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 864/12323 [1:28:22<19:32:08,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 864/12323 [1:28:22<19:32:08,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137507,8 +33643,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12314/12318 [21:20:00<00:24,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12314/12318 [21:20:00<00:24,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 865/12323 [1:28:28<19:31:51,  6.14s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 865/12323 [1:28:28<19:31:51,  6.14s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137516,8 +33652,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12315/12318 [21:20:09<00:18,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12315/12318 [21:20:09<00:18,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 866/12323 [1:28:30<19:31:01,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 866/12323 [1:28:30<19:31:01,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137525,8 +33661,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12316/12318 [21:20:18<00:12,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12316/12318 [21:20:18<00:12,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 867/12323 [1:28:35<19:30:37,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 867/12323 [1:28:35<19:30:37,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137534,8 +33670,8 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|▉| 12317/12318 [21:20:24<00:06,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|▉| 12317/12318 [21:20:24<00:06,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 868/12323 [1:28:38<19:29:47,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 868/12323 [1:28:38<19:29:47,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137543,145 +33679,143 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|█| 12318/12318 [21:20:30<00:00,  6.24s/it, v_num=e4xv, train/loss=\r",
-      "Epoch 0: 100%|█| 12318/12318 [21:20:30<00:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 869/12323 [1:28:40<19:28:44,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 869/12323 [1:28:40<19:28:44,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation: 0it [00:00, ?it/s]\u001b[A"
+      "Epoch 0:   7%| | 870/12323 [1:28:48<19:29:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 870/12323 [1:28:48<19:29:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
-      "\r",
-      "Validation:   0%|                                        | 0/13 [00:00<?, ?it/s]\u001b[A\r\n",
       "\r",
-      "Validation DataLoader 0:   0%|                           | 0/13 [00:00<?, ?it/s]\u001b[A"
+      "Epoch 0:   7%| | 871/12323 [1:28:56<19:29:30,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 871/12323 [1:28:56<19:29:30,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:   8%|█▍                 | 1/13 [00:01<00:22,  1.87s/it]\u001b[A"
+      "Epoch 0:   7%| | 872/12323 [1:28:58<19:28:26,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 872/12323 [1:28:58<19:28:26,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  15%|██▉                | 2/13 [00:03<00:20,  1.89s/it]\u001b[A"
+      "Epoch 0:   7%| | 873/12323 [1:29:01<19:27:43,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 873/12323 [1:29:01<19:27:43,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  23%|████▍              | 3/13 [00:05<00:17,  1.74s/it]\u001b[A"
+      "Epoch 0:   7%| | 874/12323 [1:29:09<19:27:51,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 874/12323 [1:29:09<19:27:51,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  31%|█████▊             | 4/13 [00:07<00:16,  1.78s/it]\u001b[A"
+      "Epoch 0:   7%| | 875/12323 [1:29:15<19:27:47,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 875/12323 [1:29:15<19:27:47,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  38%|███████▎           | 5/13 [00:07<00:12,  1.50s/it]\u001b[A"
+      "Epoch 0:   7%| | 876/12323 [1:29:23<19:28:08,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 876/12323 [1:29:23<19:28:08,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  46%|████████▊          | 6/13 [00:10<00:11,  1.67s/it]\u001b[A"
+      "Epoch 0:   7%| | 877/12323 [1:29:24<19:26:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 877/12323 [1:29:24<19:26:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  54%|██████████▏        | 7/13 [00:11<00:09,  1.63s/it]\u001b[A"
+      "Epoch 0:   7%| | 878/12323 [1:29:27<19:26:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 878/12323 [1:29:27<19:26:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  62%|███████████▋       | 8/13 [00:13<00:08,  1.72s/it]\u001b[A"
+      "Epoch 0:   7%| | 879/12323 [1:29:35<19:26:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 879/12323 [1:29:35<19:26:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  69%|█████████████▏     | 9/13 [00:14<00:06,  1.60s/it]\u001b[A"
+      "Epoch 0:   7%| | 880/12323 [1:29:40<19:26:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 880/12323 [1:29:40<19:26:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  77%|█████████████▊    | 10/13 [00:15<00:04,  1.54s/it]\u001b[A"
+      "Epoch 0:   7%| | 881/12323 [1:29:43<19:25:18,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 881/12323 [1:29:43<19:25:18,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  85%|███████████████▏  | 11/13 [00:15<00:02,  1.45s/it]\u001b[A"
+      "Epoch 0:   7%| | 882/12323 [1:29:47<19:24:41,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 882/12323 [1:29:47<19:24:41,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0:  92%|████████████████▌ | 12/13 [00:17<00:01,  1.48s/it]\u001b[A"
+      "Epoch 0:   7%| | 883/12323 [1:29:52<19:24:17,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 883/12323 [1:29:52<19:24:17,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r\n",
       "\r",
-      "Validation DataLoader 0: 100%|██████████████████| 13/13 [00:19<00:00,  1.54s/it]\u001b[A"
+      "Epoch 0:   7%| | 884/12323 [1:29:56<19:23:54,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 884/12323 [1:29:56<19:23:54,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
@@ -137689,1165 +33823,1391 @@
      "output_type": "stream",
      "text": [
       "\r",
-      "Epoch 0: 100%|█| 12318/12318 [21:21:06<00:00,  6.24s/it, v_num=e4xv, train/loss=\r\n",
-      "\r",
-      "                                                                                \u001b[A\r",
-      "Epoch 0: 100%|█| 12318/12318 [21:21:06<00:00,  6.24s/it, v_num=e4xv, train/loss="
+      "Epoch 0:   7%| | 885/12323 [1:30:03<19:23:50,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 885/12323 [1:30:03<19:23:50,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "`Trainer.fit` stopped: `max_epochs=1` reached.\r\n",
       "\r",
-      "Epoch 0: 100%|█| 12318/12318 [21:21:19<00:00,  6.24s/it, v_num=e4xv, train/loss=\r\n"
+      "Epoch 0:   7%| | 886/12323 [1:30:04<19:22:48,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 886/12323 [1:30:04<19:22:48,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Waiting for W&B process to finish... \u001b[32m(success).\u001b[0m\r\n"
+      "\r",
+      "Epoch 0:   7%| | 887/12323 [1:30:09<19:22:25,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 887/12323 [1:30:09<19:22:25,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:              batchidx ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:                 epoch ▁▁▁▁▁▁▁▁\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:           global_rank ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:          real_ctx_len ▇██▃▂▃▃▆▆▄▄▄█▁▃▇▆▃▂▅▆▃▅▇▆█▅▄▇▅▅▆█▃▄▆▅▂▁▇\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:               substep ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:            train/loss █▇▇▁▁▆▁▅▄▁▅▁▁▇▅▅▁▁▁▁▁▁▁▆▁▁▁▄▁▃▆▆▅▃▂▁▄▆▁▄\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:   trainer/global_step ▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: trainer/learning_rate ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:       validation/loss ▁\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:              batchidx 12\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:                 epoch 0\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:           global_rank 0\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:          real_ctx_len 5675\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:               substep 96\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:            train/loss 2.48438\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:   trainer/global_step 384\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: trainer/learning_rate 0.0001\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m:       validation/loss 1.48101\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: \r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mv5-L96-D1024-E0.1 - Mem-Tune ctx-8k (train-ctx=4k, deepspeed_stage_1)\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/runs/42tne4xv\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: ️⚡ View job at \u001b[34m\u001b[4mhttps://wandb.ai/rwkv-x-dev/RWKV-X-Experiments/jobs/QXJ0aWZhY3RDb2xsZWN0aW9uOjkzMjg5ODA3/version_details/v19\u001b[0m\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)\r\n",
-      "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20230902_084200-42tne4xv/logs\u001b[0m\r\n"
+      "\r",
+      "Epoch 0:   7%| | 888/12323 [1:30:14<19:22:10,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 888/12323 [1:30:14<19:22:10,  6.10s/it, v_num=i2o7, train/loss="
      ]
-    }
-   ],
-   "source": [
-    "# Start the finetune model training\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
-    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
-    "    python lightning_trainer.py fit \\\n",
-    "        -c \"{NOTEBOOK_DIR}/config-mem-template.yaml\" \\\n",
-    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
-    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
-    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
-    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
-    "        --model.lr_init=1e-4 \\\n",
-    "        --model.lr_final=1e-4 \\\n",
-    "        --data.max_token_size=8192 \\\n",
-    "        --model.ctx_len=4096 \\\n",
-    "        --model.bptt_learning_range=2 \\\n",
-    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "ae8d117d",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-03T06:13:06.960326Z",
-     "iopub.status.busy": "2023-09-03T06:13:06.959873Z",
-     "iopub.status.idle": "2023-09-03T06:13:24.992905Z",
-     "shell.execute_reply": "2023-09-03T06:13:24.991928Z"
-    },
-    "papermill": {
-     "duration": 18.882409,
-     "end_time": "2023-09-03T06:13:24.994661",
-     "exception": false,
-     "start_time": "2023-09-03T06:13:06.112252",
-     "status": "completed"
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
+      "\r",
+      "Epoch 0:   7%| | 889/12323 [1:30:23<19:22:34,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 889/12323 [1:30:23<19:22:34,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing zero checkpoint '../checkpoint/v5-L96-D1024-E0_1-mem-ctx-8k/last.ckpt/checkpoint'\r\n"
+      "\r",
+      "Epoch 0:   7%| | 890/12323 [1:30:31<19:22:55,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 890/12323 [1:30:31<19:22:55,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Detected checkpoint of type zero stage ZeroStageEnum.optimizer_states, world_size: 8\r\n"
+      "\r",
+      "Epoch 0:   7%| | 891/12323 [1:30:37<19:22:52,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 891/12323 [1:30:37<19:22:52,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Parsing checkpoint created by deepspeed==0.9.3\r\n"
+      "\r",
+      "Epoch 0:   7%| | 892/12323 [1:30:42<19:22:22,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 892/12323 [1:30:42<19:22:22,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Reconstructed fp32 state dict with 1926 params 1412678656 elements\r\n",
-      "Saving bf16 state dict to ../model/v5-L96-D1024-E0_1-mem-ctx-8k.pth\r\n"
+      "\r",
+      "Epoch 0:   7%| | 893/12323 [1:30:43<19:21:14,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 893/12323 [1:30:43<19:21:14,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-rw-r--r-- 1 root root 2.7G Sep  3 06:13 ../model/v5-L96-D1024-E0_1-mem-ctx-8k.pth\r\n"
+      "\r",
+      "Epoch 0:   7%| | 894/12323 [1:30:48<19:20:52,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 894/12323 [1:30:48<19:20:52,  6.09s/it, v_num=i2o7, train/loss="
      ]
-    }
-   ],
-   "source": [
-    "# Lets export the model from the checkpoint\n",
-    "!cd \"{TRAINER_DIR}\" && \\\n",
-    "    python export_checkpoint.py \\\n",
-    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
-    "        \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
-    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "3d2ac060",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-03T06:13:26.670520Z",
-     "iopub.status.busy": "2023-09-03T06:13:26.669753Z",
-     "iopub.status.idle": "2023-09-03T06:15:13.803308Z",
-     "shell.execute_reply": "2023-09-03T06:15:13.802459Z"
-    },
-    "papermill": {
-     "duration": 107.982106,
-     "end_time": "2023-09-03T06:15:13.805045",
-     "exception": false,
-     "start_time": "2023-09-03T06:13:25.822939",
-     "status": "completed"
     },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
+      "\r",
+      "Epoch 0:   7%| | 895/12323 [1:30:53<19:20:36,  6.09s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 895/12323 [1:30:53<19:20:36,  6.09s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+      "\r",
+      "Epoch 0:   7%| | 896/12323 [1:31:35<19:28:11,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 896/12323 [1:31:35<19:28:11,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "###\r\n",
-      "### Model validation start ###\r\n",
-      "###\r\n"
+      "\r",
+      "Epoch 0:   7%| | 897/12323 [1:31:38<19:27:16,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 897/12323 [1:31:38<19:27:16,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 5 tokens : 100.0% similarity, with 5 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 898/12323 [1:31:41<19:26:28,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 898/12323 [1:31:41<19:26:28,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 10 tokens : 100.0% similarity, with 10 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 899/12323 [1:31:48<19:26:39,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 899/12323 [1:31:48<19:26:39,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 15 tokens : 100.0% similarity, with 15 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 900/12323 [1:31:51<19:25:58,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 900/12323 [1:31:51<19:25:58,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 20 tokens : 100.0% similarity, with 20 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 901/12323 [1:31:57<19:25:42,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 901/12323 [1:31:57<19:25:42,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 25 tokens : 100.0% similarity, with 25 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 902/12323 [1:32:05<19:26:05,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 902/12323 [1:32:05<19:26:05,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 30 tokens : 100.0% similarity, with 30 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 903/12323 [1:32:09<19:25:30,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 903/12323 [1:32:09<19:25:30,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 35 tokens : 100.0% similarity, with 35 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 904/12323 [1:32:15<19:25:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 904/12323 [1:32:15<19:25:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 40 tokens : 100.0% similarity, with 40 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 905/12323 [1:32:19<19:24:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 905/12323 [1:32:19<19:24:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 45 tokens : 100.0% similarity, with 45 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 906/12323 [1:32:21<19:23:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 906/12323 [1:32:21<19:23:52,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 50 tokens : 100.0% similarity, with 50 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 907/12323 [1:32:30<19:24:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 907/12323 [1:32:30<19:24:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 55 tokens : 100.0% similarity, with 55 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 908/12323 [1:32:36<19:24:14,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 908/12323 [1:32:36<19:24:14,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 60 tokens : 100.0% similarity, with 60 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 909/12323 [1:32:41<19:23:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 909/12323 [1:32:41<19:23:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 65 tokens : 100.0% similarity, with 65 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 910/12323 [1:32:46<19:23:30,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 910/12323 [1:32:46<19:23:30,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 70 tokens : 100.0% similarity, with 70 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 911/12323 [1:32:50<19:22:55,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 911/12323 [1:32:50<19:22:55,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 75 tokens : 100.0% similarity, with 75 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 912/12323 [1:32:52<19:22:01,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 912/12323 [1:32:52<19:22:01,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 80 tokens : 100.0% similarity, with 80 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 913/12323 [1:32:59<19:22:11,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 913/12323 [1:32:59<19:22:11,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 85 tokens : 100.0% similarity, with 85 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 914/12323 [1:33:06<19:22:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 914/12323 [1:33:06<19:22:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 90 tokens : 100.0% similarity, with 90 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 915/12323 [1:33:14<19:22:32,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 915/12323 [1:33:14<19:22:32,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 95 tokens : 100.0% similarity, with 95 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 916/12323 [1:33:20<19:22:18,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 916/12323 [1:33:20<19:22:18,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 100 tokens : 100.0% similarity, with 100 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 917/12323 [1:33:23<19:21:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 917/12323 [1:33:23<19:21:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 105 tokens : 100.0% similarity, with 105 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 918/12323 [1:33:32<19:22:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 918/12323 [1:33:32<19:22:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 110 tokens : 100.0% similarity, with 110 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 919/12323 [1:33:37<19:21:45,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 919/12323 [1:33:37<19:21:45,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 115 tokens : 100.0% similarity, with 115 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 920/12323 [1:33:40<19:21:04,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 920/12323 [1:33:40<19:21:04,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 120 tokens : 100.0% similarity, with 120 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 921/12323 [1:33:45<19:20:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 921/12323 [1:33:45<19:20:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 125 tokens : 100.0% similarity, with 125 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 922/12323 [1:33:48<19:19:56,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 922/12323 [1:33:48<19:19:56,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 130 tokens : 100.0% similarity, with 130 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 923/12323 [1:33:55<19:20:08,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 923/12323 [1:33:55<19:20:08,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 135 tokens : 100.0% similarity, with 135 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   7%| | 924/12323 [1:34:04<19:20:30,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   7%| | 924/12323 [1:34:04<19:20:30,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 140 tokens : 100.0% similarity, with 140 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 925/12323 [1:34:09<19:20:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 925/12323 [1:34:09<19:20:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 145 tokens : 100.0% similarity, with 145 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 926/12323 [1:34:15<19:20:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 926/12323 [1:34:15<19:20:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 150 tokens : 100.0% similarity, with 150 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 927/12323 [1:34:21<19:20:04,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 927/12323 [1:34:21<19:20:04,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 160 tokens : 100.0% similarity, with 160 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 928/12323 [1:34:43<19:23:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 928/12323 [1:34:43<19:23:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 170 tokens : 100.0% similarity, with 170 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 929/12323 [1:34:45<19:22:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 929/12323 [1:34:45<19:22:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 180 tokens : 100.0% similarity, with 180 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 930/12323 [1:34:49<19:21:36,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 930/12323 [1:34:49<19:21:36,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 190 tokens : 100.0% similarity, with 190 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 931/12323 [1:34:56<19:21:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 931/12323 [1:34:56<19:21:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 200 tokens : 99.5% similarity, with 199 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 932/12323 [1:35:03<19:21:46,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 932/12323 [1:35:03<19:21:46,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 210 tokens : 99.52380952380952% similarity, with 209 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 933/12323 [1:35:07<19:21:12,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 933/12323 [1:35:07<19:21:12,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 220 tokens : 99.54545454545455% similarity, with 219 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 934/12323 [1:35:14<19:21:23,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 934/12323 [1:35:14<19:21:23,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 230 tokens : 100.0% similarity, with 230 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 935/12323 [1:35:23<19:21:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 935/12323 [1:35:23<19:21:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 240 tokens : 100.0% similarity, with 240 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 936/12323 [1:35:24<19:20:40,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 936/12323 [1:35:24<19:20:40,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 250 tokens : 99.6% similarity, with 249 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 937/12323 [1:35:29<19:20:20,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 937/12323 [1:35:29<19:20:20,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 260 tokens : 99.61538461538461% similarity, with 259 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 938/12323 [1:35:36<19:20:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 938/12323 [1:35:36<19:20:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 270 tokens : 99.62962962962963% similarity, with 269 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 939/12323 [1:35:43<19:20:28,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 939/12323 [1:35:43<19:20:28,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 280 tokens : 100.0% similarity, with 280 matched token, and 0 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 940/12323 [1:35:49<19:20:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 940/12323 [1:35:49<19:20:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 290 tokens : 99.6551724137931% similarity, with 289 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 941/12323 [1:35:54<19:20:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 941/12323 [1:35:54<19:20:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 300 tokens : 99.66666666666667% similarity, with 299 matched token, and 1 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 942/12323 [1:35:59<19:19:39,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 942/12323 [1:35:59<19:19:39,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 325 tokens : 98.76923076923076% similarity, with 321 matched token, and 4 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 943/12323 [1:36:02<19:19:05,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 943/12323 [1:36:02<19:19:05,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 350 tokens : 98.85714285714286% similarity, with 346 matched token, and 4 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 944/12323 [1:36:08<19:18:49,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 944/12323 [1:36:08<19:18:49,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 375 tokens : 98.66666666666667% similarity, with 370 matched token, and 5 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 945/12323 [1:36:14<19:18:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 945/12323 [1:36:14<19:18:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 400 tokens : 98.75% similarity, with 395 matched token, and 5 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 946/12323 [1:36:15<19:17:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 946/12323 [1:36:15<19:17:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 425 tokens : 98.82352941176471% similarity, with 420 matched token, and 5 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 947/12323 [1:36:24<19:18:02,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 947/12323 [1:36:24<19:18:02,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 450 tokens : 98.88888888888889% similarity, with 445 matched token, and 5 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 948/12323 [1:36:32<19:18:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 948/12323 [1:36:32<19:18:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 475 tokens : 98.52631578947368% similarity, with 468 matched token, and 7 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 949/12323 [1:36:40<19:18:43,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 949/12323 [1:36:40<19:18:43,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 500 tokens : 97.8% similarity, with 489 matched token, and 11 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 950/12323 [1:36:47<19:18:40,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 950/12323 [1:36:47<19:18:40,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 525 tokens : 97.90476190476191% similarity, with 514 matched token, and 11 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 951/12323 [1:36:49<19:17:53,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 951/12323 [1:36:49<19:17:53,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 550 tokens : 97.81818181818181% similarity, with 538 matched token, and 12 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 952/12323 [1:36:55<19:17:38,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 952/12323 [1:36:55<19:17:38,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 575 tokens : 97.3913043478261% similarity, with 560 matched token, and 15 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 953/12323 [1:36:59<19:17:10,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 953/12323 [1:36:59<19:17:10,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 600 tokens : 97.33333333333334% similarity, with 584 matched token, and 16 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 954/12323 [1:37:06<19:17:20,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 954/12323 [1:37:06<19:17:20,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 625 tokens : 97.6% similarity, with 610 matched token, and 15 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 955/12323 [1:37:10<19:16:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 955/12323 [1:37:10<19:16:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 650 tokens : 97.23076923076923% similarity, with 632 matched token, and 18 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 956/12323 [1:37:12<19:15:49,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 956/12323 [1:37:12<19:15:49,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 675 tokens : 97.48148148148148% similarity, with 658 matched token, and 17 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 957/12323 [1:37:16<19:15:17,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 957/12323 [1:37:16<19:15:17,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 700 tokens : 97.42857142857143% similarity, with 682 matched token, and 18 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 958/12323 [1:37:24<19:15:39,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 750 tokens : 97.73333333333333% similarity, with 733 matched token, and 17 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 958/12323 [1:37:24<19:15:39,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 800 tokens : 97.5% similarity, with 780 matched token, and 20 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 959/12323 [1:37:26<19:14:36,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 959/12323 [1:37:26<19:14:36,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 850 tokens : 97.29411764705883% similarity, with 827 matched token, and 23 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 960/12323 [1:37:54<19:18:49,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 960/12323 [1:37:54<19:18:49,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 900 tokens : 97.55555555555556% similarity, with 878 matched token, and 22 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 961/12323 [1:37:58<19:18:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 961/12323 [1:37:58<19:18:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 950 tokens : 97.36842105263158% similarity, with 925 matched token, and 25 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 962/12323 [1:38:06<19:18:37,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 962/12323 [1:38:06<19:18:37,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1000 tokens : 97.5% similarity, with 975 matched token, and 25 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 963/12323 [1:38:12<19:18:34,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 963/12323 [1:38:12<19:18:34,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "###\r\n",
-      "### Model validation end ###\r\n",
-      "###\r\n"
+      "\r",
+      "Epoch 0:   8%| | 964/12323 [1:38:21<19:18:53,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 964/12323 [1:38:21<19:18:53,  6.12s/it, v_num=i2o7, train/loss="
      ]
-    }
-   ],
-   "source": [
-    "# Lets do a quick memory test\n",
-    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
-    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "1300c5e3",
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2023-09-03T06:15:15.579446Z",
-     "iopub.status.busy": "2023-09-03T06:15:15.579184Z",
-     "iopub.status.idle": "2023-09-03T06:19:23.422747Z",
-     "shell.execute_reply": "2023-09-03T06:19:23.422044Z"
     },
-    "papermill": {
-     "duration": 248.769474,
-     "end_time": "2023-09-03T06:19:23.424598",
-     "exception": false,
-     "start_time": "2023-09-03T06:15:14.655124",
-     "status": "completed"
-    },
-    "tags": []
-   },
-   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Setting ds_accelerator to cuda (auto detect)\r\n"
+      "\r",
+      "Epoch 0:   8%| | 965/12323 [1:38:28<19:19:02,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 965/12323 [1:38:28<19:19:02,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[RWKV.model] Running RWKV model using 'torch-jit' with torch '2.0.1+cu118'\r\n"
+      "\r",
+      "Epoch 0:   8%| | 966/12323 [1:38:33<19:18:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 966/12323 [1:38:33<19:18:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "###\r\n",
-      "### Model validation start ###\r\n",
-      "###\r\n"
+      "\r",
+      "Epoch 0:   8%| | 967/12323 [1:38:35<19:17:45,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 967/12323 [1:38:35<19:17:45,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1000 tokens : 97.5% similarity, with 975 matched token, and 25 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 968/12323 [1:38:43<19:18:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 968/12323 [1:38:43<19:18:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1050 tokens : 97.23809523809524% similarity, with 1021 matched token, and 29 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 969/12323 [1:38:51<19:18:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 969/12323 [1:38:51<19:18:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1100 tokens : 97.0% similarity, with 1067 matched token, and 33 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 970/12323 [1:38:56<19:18:00,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 970/12323 [1:38:56<19:18:00,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1150 tokens : 97.21739130434783% similarity, with 1118 matched token, and 32 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 971/12323 [1:39:03<19:18:07,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 971/12323 [1:39:03<19:18:07,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1200 tokens : 97.25% similarity, with 1167 matched token, and 33 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 972/12323 [1:39:11<19:18:27,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 972/12323 [1:39:11<19:18:27,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1250 tokens : 97.04% similarity, with 1213 matched token, and 37 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 973/12323 [1:39:20<19:18:46,  6.13s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 973/12323 [1:39:20<19:18:46,  6.13s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1300 tokens : 96.76923076923077% similarity, with 1258 matched token, and 42 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 974/12323 [1:39:25<19:18:31,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 974/12323 [1:39:25<19:18:31,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1350 tokens : 96.22222222222221% similarity, with 1299 matched token, and 51 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 975/12323 [1:39:28<19:17:52,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 975/12323 [1:39:28<19:17:52,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1400 tokens : 96.14285714285714% similarity, with 1346 matched token, and 54 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 976/12323 [1:39:35<19:17:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 976/12323 [1:39:35<19:17:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1450 tokens : 96.13793103448276% similarity, with 1394 matched token, and 56 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 977/12323 [1:39:43<19:18:09,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 977/12323 [1:39:43<19:18:09,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1500 tokens : 95.53333333333333% similarity, with 1433 matched token, and 67 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 978/12323 [1:39:50<19:18:05,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 978/12323 [1:39:50<19:18:05,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1550 tokens : 95.22580645161291% similarity, with 1476 matched token, and 74 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 979/12323 [1:39:55<19:17:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 979/12323 [1:39:55<19:17:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1600 tokens : 95.1875% similarity, with 1523 matched token, and 77 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 980/12323 [1:39:56<19:16:48,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 980/12323 [1:39:56<19:16:48,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1650 tokens : 94.48484848484848% similarity, with 1559 matched token, and 91 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 981/12323 [1:39:57<19:15:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 981/12323 [1:39:57<19:15:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1700 tokens : 93.94117647058823% similarity, with 1597 matched token, and 103 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 982/12323 [1:40:04<19:15:42,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 982/12323 [1:40:04<19:15:42,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1750 tokens : 93.54285714285714% similarity, with 1637 matched token, and 113 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 983/12323 [1:40:11<19:15:50,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 983/12323 [1:40:11<19:15:50,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1800 tokens : 92.77777777777779% similarity, with 1670 matched token, and 130 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 984/12323 [1:40:14<19:15:11,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 984/12323 [1:40:14<19:15:11,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1850 tokens : 92.10810810810811% similarity, with 1704 matched token, and 146 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 985/12323 [1:40:17<19:14:26,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 985/12323 [1:40:17<19:14:26,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1900 tokens : 91.3157894736842% similarity, with 1735 matched token, and 165 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 986/12323 [1:40:23<19:14:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 986/12323 [1:40:23<19:14:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 1950 tokens : 90.71794871794872% similarity, with 1769 matched token, and 181 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 987/12323 [1:40:31<19:14:31,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 987/12323 [1:40:31<19:14:31,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2000 tokens : 89.4% similarity, with 1788 matched token, and 212 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 988/12323 [1:40:34<19:13:46,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 988/12323 [1:40:34<19:13:46,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2050 tokens : 86.63414634146342% similarity, with 1776 matched token, and 274 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 989/12323 [1:40:35<19:12:51,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 989/12323 [1:40:35<19:12:51,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2100 tokens : 84.80952380952381% similarity, with 1781 matched token, and 319 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 990/12323 [1:40:41<19:12:35,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 990/12323 [1:40:41<19:12:35,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2150 tokens : 83.16279069767441% similarity, with 1788 matched token, and 362 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 991/12323 [1:40:49<19:12:54,  6.10s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 991/12323 [1:40:49<19:12:54,  6.10s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2200 tokens : 80.77272727272728% similarity, with 1777 matched token, and 423 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 992/12323 [1:41:05<19:14:47,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 992/12323 [1:41:05<19:14:47,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2250 tokens : 78.26666666666667% similarity, with 1761 matched token, and 489 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 993/12323 [1:41:13<19:14:56,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 993/12323 [1:41:13<19:14:56,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2300 tokens : 75.8695652173913% similarity, with 1745 matched token, and 555 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 994/12323 [1:41:21<19:15:16,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 994/12323 [1:41:21<19:15:16,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2350 tokens : 74.34042553191489% similarity, with 1747 matched token, and 603 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 995/12323 [1:41:29<19:15:24,  6.12s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 995/12323 [1:41:29<19:15:24,  6.12s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2400 tokens : 72.29166666666667% similarity, with 1735 matched token, and 665 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 996/12323 [1:41:30<19:14:23,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 996/12323 [1:41:30<19:14:23,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2450 tokens : 70.61224489795919% similarity, with 1730 matched token, and 720 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 997/12323 [1:41:35<19:14:09,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 997/12323 [1:41:35<19:14:09,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2500 tokens : 68.84% similarity, with 1721 matched token, and 779 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 998/12323 [1:41:42<19:14:07,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 998/12323 [1:41:42<19:14:07,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2550 tokens : 66.94117647058823% similarity, with 1707 matched token, and 843 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 999/12323 [1:41:48<19:14:05,  6.11s/it, v_num=i2o7, train/loss=\r",
+      "Epoch 0:   8%| | 999/12323 [1:41:48<19:14:05,  6.11s/it, v_num=i2o7, train/loss="
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2600 tokens : 65.1923076923077% similarity, with 1695 matched token, and 905 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1000/12323 [1:41:55<19:14:01,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1000/12323 [1:41:55<19:14:01,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2650 tokens : 63.77358490566037% similarity, with 1690 matched token, and 960 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1001/12323 [1:42:02<19:14:11,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1001/12323 [1:42:02<19:14:11,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2700 tokens : 62.11111111111111% similarity, with 1677 matched token, and 1023 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1002/12323 [1:42:11<19:14:30,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1002/12323 [1:42:11<19:14:30,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2750 tokens : 60.54545454545455% similarity, with 1665 matched token, and 1085 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1003/12323 [1:42:13<19:13:41,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1003/12323 [1:42:13<19:13:41,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2800 tokens : 58.92857142857143% similarity, with 1650 matched token, and 1150 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1004/12323 [1:42:20<19:13:51,  6.12s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1004/12323 [1:42:20<19:13:51,  6.12s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2850 tokens : 57.78947368421052% similarity, with 1647 matched token, and 1203 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1005/12323 [1:42:23<19:13:02,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1005/12323 [1:42:23<19:13:02,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2900 tokens : 56.55172413793104% similarity, with 1640 matched token, and 1260 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1006/12323 [1:42:28<19:12:42,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1006/12323 [1:42:28<19:12:42,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 2950 tokens : 55.1864406779661% similarity, with 1628 matched token, and 1322 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1007/12323 [1:42:32<19:12:16,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1007/12323 [1:42:32<19:12:16,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3000 tokens : 53.833333333333336% similarity, with 1615 matched token, and 1385 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1008/12323 [1:42:36<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3050 tokens : 52.49180327868852% similarity, with 1601 matched token, and 1449 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1008/12323 [1:42:36<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3100 tokens : 51.41935483870967% similarity, with 1594 matched token, and 1506 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1009/12323 [1:42:45<19:12:11,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1009/12323 [1:42:45<19:12:11,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3150 tokens : 50.06349206349206% similarity, with 1577 matched token, and 1573 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1010/12323 [1:42:53<19:12:32,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1010/12323 [1:42:53<19:12:32,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3200 tokens : 48.6875% similarity, with 1558 matched token, and 1642 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1011/12323 [1:43:00<19:12:29,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1011/12323 [1:43:00<19:12:29,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3250 tokens : 47.63076923076923% similarity, with 1548 matched token, and 1702 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1012/12323 [1:43:03<19:11:52,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1012/12323 [1:43:03<19:11:52,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3300 tokens : 46.484848484848484% similarity, with 1534 matched token, and 1766 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1013/12323 [1:43:10<19:11:50,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1013/12323 [1:43:10<19:11:50,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3350 tokens : 44.80597014925373% similarity, with 1501 matched token, and 1849 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1014/12323 [1:43:14<19:11:30,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1014/12323 [1:43:14<19:11:30,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3400 tokens : 43.29411764705882% similarity, with 1472 matched token, and 1928 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1015/12323 [1:43:16<19:10:36,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1015/12323 [1:43:16<19:10:36,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3450 tokens : 42.08695652173913% similarity, with 1452 matched token, and 1998 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1016/12323 [1:43:19<19:09:53,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1016/12323 [1:43:19<19:09:53,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3500 tokens : 40.74285714285714% similarity, with 1426 matched token, and 2074 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1017/12323 [1:43:26<19:10:02,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1017/12323 [1:43:26<19:10:02,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3550 tokens : 39.38028169014085% similarity, with 1398 matched token, and 2152 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1018/12323 [1:43:31<19:09:43,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1018/12323 [1:43:31<19:09:43,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3600 tokens : 37.94444444444444% similarity, with 1366 matched token, and 2234 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1019/12323 [1:43:35<19:09:12,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1019/12323 [1:43:35<19:09:12,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3650 tokens : 36.65753424657534% similarity, with 1338 matched token, and 2312 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1020/12323 [1:43:44<19:09:33,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1020/12323 [1:43:44<19:09:33,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3700 tokens : 35.43243243243243% similarity, with 1311 matched token, and 2389 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1021/12323 [1:43:50<19:09:31,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1021/12323 [1:43:50<19:09:31,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3750 tokens : 34.026666666666664% similarity, with 1276 matched token, and 2474 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1022/12323 [1:43:58<19:09:40,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1022/12323 [1:43:58<19:09:40,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3800 tokens : 33.1578947368421% similarity, with 1260 matched token, and 2540 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1023/12323 [1:44:03<19:09:19,  6.10s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1023/12323 [1:44:03<19:09:19,  6.10s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3850 tokens : 31.896103896103895% similarity, with 1228 matched token, and 2622 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1024/12323 [1:44:21<19:11:27,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1024/12323 [1:44:21<19:11:27,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3900 tokens : 30.538461538461537% similarity, with 1191 matched token, and 2709 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1025/12323 [1:44:23<19:10:33,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1025/12323 [1:44:23<19:10:33,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 3950 tokens : 29.443037974683545% similarity, with 1163 matched token, and 2787 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1026/12323 [1:44:27<19:10:07,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1026/12323 [1:44:27<19:10:07,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "## Model validation for 4000 tokens : 28.225% similarity, with 1129 matched token, and 2871 token mismatch\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1027/12323 [1:44:32<19:09:52,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1027/12323 [1:44:32<19:09:52,  6.11s/it, v_num=i2o7, train/loss"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "###\r\n",
-      "### Model validation end ###\r\n",
-      "###\r\n"
+      "\r",
+      "Epoch 0:   8%| | 1028/12323 [1:44:40<19:10:09,  6.11s/it, v_num=i2o7, train/loss\r",
+      "Epoch 0:   8%| | 1028/12323 [1:44:40<19:10:09,  6.11s/it, v_num=i2o7, train/loss"
      ]
     }
    ],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "    python lightning_trainer.py fit \\\n",
+    "        -c \"{NOTEBOOK_DIR}/config-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-8k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/\" \\\n",
+    "        --model.lr_init=3e-4 \\\n",
+    "        --model.lr_final=1e-4 \\\n",
+    "        --data.max_token_size=8192 \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=2 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "065aea13",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-8k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e497d3dc",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
+    "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5ae95c7",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
    "source": [
     "!export RWKV_WAVENET_LAYERS=\"{RWKV_WAVENET_LAYERS}\" && \\\n",
     "        python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-8k.pth\" \"none\" 1000 4000"
@@ -138874,14 +35234,14 @@
   },
   "papermill": {
    "default_parameters": {},
-   "duration": 77896.719256,
-   "end_time": "2023-09-03T06:19:24.673187",
+   "duration": null,
+   "end_time": null,
    "environment_variables": {},
    "exception": null,
    "input_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "output_path": "/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/output/experiment/rwkv-x-exp/v5-memory/v5-L96-D1024-E1e-1-ctx4k-part5.ipynb",
    "parameters": {},
-   "start_time": "2023-09-02T08:41:07.953931",
+   "start_time": "2023-09-02T06:16:27.986890",
    "version": "2.4.0"
   }
  },